Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2997,6 +2997,10 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
/// vector operands, performing a reduction.add on the result, and adding
/// the scalar result to a chain.
MulAccReduction,
/// Represent an inloop multiply-accumulate reduction, multiplying the
/// extended vector operands, negating the multiplication, performing a
/// reduction.add on the result, and adding the scalar result to a chain.
ExtNegatedMulAccReduction,
};

/// Type of the expression.
Expand All @@ -3020,6 +3024,19 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
VPWidenRecipe *Mul, VPReductionRecipe *Red)
: VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
{Ext0, Ext1, Mul, Red}) {}
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
VPWidenRecipe *Mul, VPWidenRecipe *Sub,
VPReductionRecipe *Red)
: VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
{Ext0, Ext1, Mul, Sub, Red}) {
assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
assert(Red->getRecurrenceKind() == RecurKind::Add &&
"Expected an add reduction");
assert(getNumOperands() >= 3 && "Expected at least three operands");
auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
assert(SubConst && SubConst->getValue() == 0 &&
Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
Comment on lines +3036 to +3038
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will result in a warning when assertions are disabled.

You can probably use something like assert(match(getOperand(2), m_ZeroInt())) but would have to move it to the .cpp file or mark as maybe_unused

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for spotting that, I'm making a fix for main now.

}

~VPExpressionRecipe() override {
for (auto *R : reverse(ExpressionRecipes))
Expand Down
31 changes: 30 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2839,12 +2839,17 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,
Ctx.CostKind);

case ExpressionTypes::ExtMulAccReduction:
case ExpressionTypes::ExtNegatedMulAccReduction:
assert(Opcode == Instruction::Add && "Unexpected opcode");
Opcode = Instruction::Sub;
LLVM_FALLTHROUGH;
case ExpressionTypes::ExtMulAccReduction: {
return Ctx.TTI.getMulAccReductionCost(
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
Instruction::ZExt,
Opcode, RedTy, SrcVecTy, Ctx.CostKind);
}
}
llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");
}

Expand Down Expand Up @@ -2890,6 +2895,30 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
O << ")";
break;
}
case ExpressionTypes::ExtNegatedMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
O << " + reduce."
<< Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
<< " (sub (0, mul";
auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
Mul->printFlags(O);
O << "(";
getOperand(0)->printAsOperand(O, SlotTracker);
auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
<< *Ext0->getResultType() << "), (";
getOperand(1)->printAsOperand(O, SlotTracker);
auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "
<< *Ext1->getResultType() << ")";
if (Red->isConditional()) {
O << ", ";
Red->getCondOp()->printAsOperand(O, SlotTracker);
}
O << "))";
break;
}
case ExpressionTypes::MulAccReduction:
case ExpressionTypes::ExtMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
Expand Down
19 changes: 18 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3543,7 +3543,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
};

VPValue *VecOp = Red->getVecOp();
VPRecipeBase *Sub = nullptr;
VPValue *A, *B;
VPValue *Tmp = nullptr;
// Sub reductions could have a sub between the add reduction and vec op.
if (match(VecOp,
m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Tmp)))) {
Sub = VecOp->getDefiningRecipe();
VecOp = Tmp;
}
// Try to match reduce.add(mul(...)).
if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
auto *RecipeA =
Expand All @@ -3560,12 +3568,21 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
Instruction::CastOps::ZExt,
Mul, RecipeA, RecipeB, nullptr)) {
if (Sub)
return new VPExpressionRecipe(RecipeA, RecipeB, Mul,
cast<VPWidenRecipe>(Sub), Red);
return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
}
// Match reduce.add(mul).
if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
// TODO: Add an expression type for this variant with a negated mul
if (!Sub &&
IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
return new VPExpressionRecipe(Mul, Red);
}
// TODO: Add an expression type for negated versions of other expression
// variants.
if (Sub)
return nullptr;
// Match reduce.add(ext(mul(ext(A), ext(B)))).
// All extend recipes must have same opcode or A == B
// which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
Expand Down
121 changes: 121 additions & 0 deletions llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,127 @@ exit:
ret i32 %add
}

define i32 @print_mulacc_negated(ptr %a, ptr %b) {
; CHECK-LABEL: 'print_mulacc_negated'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
; CHECK-NEXT: Live-in vp<%0> = VF
; CHECK-NEXT: Live-in vp<%1> = VF * UF
; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
; CHECK-NEXT: Live-in ir<1024> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: EMIT vp<%3> = reduction-start-vector ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%3>, vp<%8>
; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0>
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%5>
; CHECK-NEXT: vp<%6> = vector-pointer ir<%gep.a>
; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%6>
; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%5>
; CHECK-NEXT: vp<%7> = vector-pointer ir<%gep.b>
; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%7>
; CHECK-NEXT: EXPRESSION vp<%8> = ir<%accum> + reduce.add (sub (0, mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32)))
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%4>, vp<%1>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<%10> from middle.block)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%2>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%10>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
; CHECK-NEXT: IR %accum = phi i32 [ 0, %entry ], [ %add, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i32
; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i32
; CHECK-NEXT: IR %mul = mul i32 %ext.b, %ext.a
; CHECK-NEXT: IR %sub = sub i32 0, %mul
; CHECK-NEXT: IR %add = add i32 %accum, %sub
; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
; CHECK-NEXT: Live-in ir<1024> = vector-trip-count
; CHECK-NEXT: Live-in ir<1024> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: Successor(s): vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add>
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index>
; CHECK-NEXT: WIDEN ir<%load.a> = load ir<%gep.a>
; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index>
; CHECK-NEXT: WIDEN ir<%load.b> = load ir<%gep.b>
; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
; CHECK-NEXT: WIDEN ir<%sub> = sub ir<0>, ir<%mul>
; CHECK-NEXT: REDUCE ir<%add> = ir<%accum> + reduce.add (ir<%sub>)
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<1024>
; CHECK-NEXT: Successor(s): middle.block, vector.body
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %loop ] (extra operand: vp<[[RED_RESULT]]> from middle.block)
; CHECK-NEXT: No successors
; CHECK-NEXT: }
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%accum = phi i32 [ 0, %entry ], [ %add, %loop ]
%gep.a = getelementptr i8, ptr %a, i64 %iv
%load.a = load i8, ptr %gep.a, align 1
%ext.a = zext i8 %load.a to i32
%gep.b = getelementptr i8, ptr %b, i64 %iv
%load.b = load i8, ptr %gep.b, align 1
%ext.b = zext i8 %load.b to i32
%mul = mul i32 %ext.b, %ext.a
%sub = sub i32 0, %mul
%add = add i32 %accum, %sub
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %exit, label %loop

exit:
ret i32 %add
}

define i64 @print_mulacc_sub_extended(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-LABEL: 'print_mulacc_sub_extended'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
Expand Down