-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[LV] Add ExtNegatedMulAccReduction expression type #160154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Sam Tebbs (SamTebbs33) ChangesThis PR adds the ExtNegatedMulAccReduction expression type for VPExpressionRecipe so that extend-multiply-accumulate reductions with a negated multiply can be bundled. Stacked PRs:
Full diff: https://github.com/llvm/llvm-project/pull/160154.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e6f6067bc9df3..1cb0c889528ec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2989,6 +2989,12 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
/// vector operands, performing a reduction.add on the result, and adding
/// the scalar result to a chain.
MulAccReduction,
+ /// Represent an inloop multiply-accumulate reduction, multiplying the
+ /// extended vector operands, negating the multiplication, performing a
+ /// reduction.add
+ /// on the result, and adding
+ /// the scalar result to a chain.
+ ExtNegatedMulAccReduction,
};
/// Type of the expression.
@@ -3012,6 +3018,11 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
VPWidenRecipe *Mul, VPReductionRecipe *Red)
: VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
{Ext0, Ext1, Mul, Red}) {}
+ VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
+ VPWidenRecipe *Mul, VPWidenRecipe *Sub,
+ VPReductionRecipe *Red)
+ : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
+ {Ext0, Ext1, Mul, Sub, Red}) {}
~VPExpressionRecipe() override {
SmallSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4568d4f37a751..02be0db102547 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2861,12 +2861,17 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,
Ctx.CostKind);
- case ExpressionTypes::ExtMulAccReduction:
+ case ExpressionTypes::ExtNegatedMulAccReduction:
+ case ExpressionTypes::ExtMulAccReduction: {
+ if (ExpressionType == ExpressionTypes::ExtNegatedMulAccReduction &&
+ Opcode == Instruction::Add)
+ Opcode = Instruction::Sub;
return Ctx.TTI.getMulAccReductionCost(
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
Instruction::ZExt,
Opcode, RedTy, SrcVecTy, Ctx.CostKind);
}
+ }
llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");
}
@@ -2912,6 +2917,30 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
O << ")";
break;
}
+ case ExpressionTypes::ExtNegatedMulAccReduction: {
+ getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
+ O << " + reduce."
+ << Instruction::getOpcodeName(
+ RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
+ << " (sub (0, mul";
+ auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
+ Mul->printFlags(O);
+ O << "(";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
+ O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
+ << *Ext0->getResultType() << "), (";
+ getOperand(1)->printAsOperand(O, SlotTracker);
+ auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
+ O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "
+ << *Ext1->getResultType() << ")";
+ if (Red->isConditional()) {
+ O << ", ";
+ Red->getCondOp()->printAsOperand(O, SlotTracker);
+ }
+ O << "))";
+ break;
+ }
case ExpressionTypes::MulAccReduction:
case ExpressionTypes::ExtMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1f6b85270607e..ca89c4fa0d2e6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3524,14 +3524,22 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
};
VPValue *VecOp = Red->getVecOp();
+ VPValue *Mul = nullptr;
+ VPValue *Sub = nullptr;
VPValue *A, *B;
+ // Sub reductions could have a sub between the add reduction and vec op.
+ if (match(VecOp,
+ m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Mul))))
+ Sub = VecOp;
+ else
+ Mul = VecOp;
// Try to match reduce.add(mul(...)).
- if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
+ if (match(Mul, m_Mul(m_VPValue(A), m_VPValue(B)))) {
auto *RecipeA =
dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe());
auto *RecipeB =
dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
- auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
+ auto *MulR = cast<VPWidenRecipe>(Mul->getDefiningRecipe());
// Match reduce.add(mul(ext, ext)).
if (RecipeA && RecipeB &&
@@ -3540,29 +3548,35 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
Instruction::CastOps::ZExt,
- Mul, RecipeA, RecipeB, nullptr)) {
- return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
+ MulR, RecipeA, RecipeB, nullptr)) {
+ if (Sub)
+ return new VPExpressionRecipe(
+ RecipeA, RecipeB, MulR,
+ cast<VPWidenRecipe>(Sub->getDefiningRecipe()), Red);
+ return new VPExpressionRecipe(RecipeA, RecipeB, MulR, Red);
}
// Match reduce.add(mul).
- if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
- return new VPExpressionRecipe(Mul, Red);
+ // TODO: Add an expression type for this variant with a negated mul
+ if (!Sub &&
+ IsMulAccValidAndClampRange(true, MulR, nullptr, nullptr, nullptr))
+ return new VPExpressionRecipe(MulR, Red);
}
// Match reduce.add(ext(mul(ext(A), ext(B)))).
// All extend recipes must have same opcode or A == B
// which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
- if (match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()),
- m_ZExtOrSExt(m_VPValue()))))) {
+ if (!Sub && match(Mul, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()),
+ m_ZExtOrSExt(m_VPValue()))))) {
auto *Ext = cast<VPWidenCastRecipe>(VecOp->getDefiningRecipe());
- auto *Mul = cast<VPWidenRecipe>(Ext->getOperand(0)->getDefiningRecipe());
+ auto *MulR = cast<VPWidenRecipe>(Ext->getOperand(0)->getDefiningRecipe());
auto *Ext0 =
- cast<VPWidenCastRecipe>(Mul->getOperand(0)->getDefiningRecipe());
+ cast<VPWidenCastRecipe>(MulR->getOperand(0)->getDefiningRecipe());
auto *Ext1 =
- cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe());
+ cast<VPWidenCastRecipe>(MulR->getOperand(1)->getDefiningRecipe());
if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
Ext0->getOpcode() == Ext1->getOpcode() &&
IsMulAccValidAndClampRange(Ext0->getOpcode() ==
Instruction::CastOps::ZExt,
- Mul, Ext0, Ext1, Ext)) {
+ MulR, Ext0, Ext1, Ext)) {
auto *NewExt0 = new VPWidenCastRecipe(
Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
Ext0->getDebugLoc());
@@ -3575,10 +3589,10 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
Ext1->getDebugLoc());
NewExt1->insertBefore(Ext1);
}
- Mul->setOperand(0, NewExt0);
- Mul->setOperand(1, NewExt1);
- Red->setOperand(1, Mul);
- return new VPExpressionRecipe(NewExt0, NewExt1, Mul, Red);
+ MulR->setOperand(0, NewExt0);
+ MulR->setOperand(1, NewExt1);
+ Red->setOperand(1, MulR);
+ return new VPExpressionRecipe(NewExt0, NewExt1, MulR, Red);
}
}
return nullptr;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index db64eb32f0320..06b044872c217 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -580,6 +580,127 @@ exit:
ret i32 %add
}
+define i32 @print_mulacc_negated(ptr %a, ptr %b) {
+; CHECK-LABEL: 'print_mulacc_negated'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: Live-in ir<1024> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%3> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%3>, vp<%8>
+; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0>
+; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%5>
+; CHECK-NEXT: vp<%6> = vector-pointer ir<%gep.a>
+; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%6>
+; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%5>
+; CHECK-NEXT: vp<%7> = vector-pointer ir<%gep.b>
+; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%7>
+; CHECK-NEXT: EXPRESSION vp<%8> = ir<%accum> + reduce.add (sub (0, mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32)))
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%4>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%10> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%2>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%10>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %accum = phi i32 [ 0, %entry ], [ %add, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
+; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
+; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i32
+; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
+; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
+; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i32
+; CHECK-NEXT: IR %mul = mul i32 %ext.b, %ext.a
+; CHECK-NEXT: IR %sub = sub i32 0, %mul
+; CHECK-NEXT: IR %add = add i32 %accum, %sub
+; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
+; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<1024> = vector-trip-count
+; CHECK-NEXT: Live-in ir<1024> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: Successor(s): vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add>
+; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%load.a> = load ir<%gep.a>
+; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%load.b> = load ir<%gep.b>
+; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
+; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
+; CHECK-NEXT: WIDEN ir<%sub> = sub ir<0>, ir<%mul>
+; CHECK-NEXT: REDUCE ir<%add> = ir<%accum> + reduce.add (ir<%sub>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<1024>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
+; CHECK-NEXT: Successor(s): ir-bb<exit>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[RED_RESULT]]> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %accum = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %gep.a = getelementptr i8, ptr %a, i64 %iv
+ %load.a = load i8, ptr %gep.a, align 1
+ %ext.a = zext i8 %load.a to i32
+ %gep.b = getelementptr i8, ptr %b, i64 %iv
+ %load.b = load i8, ptr %gep.b, align 1
+ %ext.b = zext i8 %load.b to i32
+ %mul = mul i32 %ext.b, %ext.a
+ %sub = sub i32 0, %mul
+ %add = add i32 %accum, %sub
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret i32 %add
+}
+
define i64 @print_mulacc_sub_extended(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: 'print_mulacc_sub_extended'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
|
/// reduction.add | ||
/// on the result, and adding | ||
/// the scalar result to a chain. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: strange formatting
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
VPWidenRecipe *Mul, VPWidenRecipe *Sub, | ||
VPReductionRecipe *Red) | ||
: VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction, | ||
{Ext0, Ext1, Mul, Sub, Red}) {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should assert that Red
is an add
, Sub
is a sub(0, ...)
and Mul
is a Instruction::Mul
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
case ExpressionTypes::ExtNegatedMulAccReduction: | ||
case ExpressionTypes::ExtMulAccReduction: { | ||
if (ExpressionType == ExpressionTypes::ExtNegatedMulAccReduction && | ||
Opcode == Instruction::Add) | ||
Opcode = Instruction::Sub; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it ever happen that Opcode != Instruction::Add
? If not, this should be written as:
case ExpressionTypes::ExtNegatedMulAccReduction: | |
case ExpressionTypes::ExtMulAccReduction: { | |
if (ExpressionType == ExpressionTypes::ExtNegatedMulAccReduction && | |
Opcode == Instruction::Add) | |
Opcode = Instruction::Sub; | |
case ExpressionTypes::ExtNegatedMulAccReduction: | |
assert(Opcode == Instruction::Add && "Unexpected opcode"); | |
Opcode = Instruction::Sub; | |
LLVM_FALLTHROUGH; | |
case ExpressionTypes::ExtMulAccReduction: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is possible for it to be a sub, such as in the print_mulacc_negated test in vplan-printing-reductions.ll, where the reduction is classified as having the Sub
opcode.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed now, thanks!
if (match(VecOp, | ||
m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Mul)))) | ||
Sub = VecOp; | ||
else | ||
Mul = VecOp; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a lot of renaming going on, making this change a lot bigger than it should be. If you would write:
VPValue *Sub = nullptr;
// Sub reductions could have a sub between the add reduction and vec op.
if (match(VecOp,
m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(VecOp))))
Sub = Red->getVecOp();
Then you can do away with the changes that just rename VecOp
-> Mul
and Mul -> MulR
.
I mentioned this in a previous review, but this suggestion somehow got reverted again.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
// which can be transform to reduce.add(zext(mul(sext(A), sext(B)))). | ||
if (match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()), | ||
m_ZExtOrSExt(m_VPValue()))))) { | ||
if (!Sub && match(Mul, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This also needs a TODO that a new expression type for sub-reductions is needed for this variant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits addressed
VPValue *A, *B; | ||
// Sub reductions could have a sub between the add reduction and vec op. | ||
if (match(VecOp, m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue()))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just a minor nit: is it worth creating a VPValue *Tmp
, and matching m_VPValue(Tmp)
, and then doing VecOp = Tmp;
rather than getting operand 1 from the defining recipe?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's nice, done.
@@ -3524,7 +3524,13 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, | |||
}; | |||
|
|||
VPValue *VecOp = Red->getVecOp(); | |||
VPValue *Sub = nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Is it worth making Sub
a VPRecipeBase
? (since that's the only use-case of it)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
if (match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()), | ||
m_ZExtOrSExt(m_VPValue()))))) { | ||
// TODO: Add an expression type for this variant with a negated mul | ||
if (!Sub && match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: rather than doing !Sub &&
here, I would just have an explicit if (Sub) return nullptr;
in case other cases get added in the future that may also not support the sub
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense to me, done.
This PR adds the ExtNegatedMulAccReduction expression type for VPExpressionRecipe so that extend-multiply-accumulate reductions with a negated multiply can be bundled. Stacked PRs: 1. llvm#156976 2. -> This 3. llvm#147302
0462bca
to
1987159
Compare
auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue()); | ||
assert(SubConst && SubConst->getValue() == 0 && | ||
Sub->getOpcode() == Instruction::Sub && "Expected a negating sub"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will result in a warning when assertions are disabled.
You can probably use something like assert(match(getOperand(2), m_ZeroInt()))
but would have to move it to the .cpp file or mark as maybe_unused
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for spotting that, I'm making a fix for main now.
This PR adds the ExtNegatedMulAccReduction expression type for VPExpressionRecipe so that extend-multiply-accumulate reductions with a negated multiply can be bundled. Stacked PRs: 1. llvm/llvm-project#156976 2. -> llvm/llvm-project#160154 3. llvm/llvm-project#147302
#160154 added an assertion using a new variable, which caused a warning in builds without asserts. This patch adds [[maybe_unused]] to prevent that warning.
llvm/llvm-project#160154 added an assertion using a new variable, which caused a warning in builds without asserts. This patch adds [[maybe_unused]] to prevent that warning.
This PR adds the ExtNegatedMulAccReduction expression type for VPExpressionRecipe so that extend-multiply-accumulate reductions with a negated multiply can be bundled. Stacked PRs: 1. llvm#156976 2. -> llvm#160154 3. llvm#147302
llvm#160154 added an assertion using a new variable, which caused a warning in builds without asserts. This patch adds [[maybe_unused]] to prevent that warning.
This PR adds the ExtNegatedMulAccReduction expression type for VPExpressionRecipe so that extend-multiply-accumulate reductions with a negated multiply can be bundled.
Stacked PRs: