Skip to content

Commit

Permalink
Revert "[SLP] Make getSameOpcode support different instructions if th…
Browse files Browse the repository at this point in the history
…ey have same semantics. (#112181)"

This reverts commit 8220415.
  • Loading branch information
HanKuanChen committed Dec 13, 2024
1 parent 8220415 commit 3133acf
Show file tree
Hide file tree
Showing 14 changed files with 159 additions and 324 deletions.
242 changes: 35 additions & 207 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -842,123 +842,8 @@ class InstructionsState {
static InstructionsState invalid() { return {nullptr, nullptr}; }
};

struct InterchangeableInstruction {
unsigned Opcode;
SmallVector<Value *> Ops;
template <class... ArgTypes>
InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
: Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
};

bool operator<(const InterchangeableInstruction &LHS,
const InterchangeableInstruction &RHS) {
return LHS.Opcode < RHS.Opcode;
}

} // end anonymous namespace

/// \returns a sorted list of interchangeable instructions by instruction opcode
/// that \p I can be converted to.
/// e.g.,
/// x << y -> x * (2^y)
/// x << 1 -> x * 2
/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
/// x * 0 -> x & 0
/// x * -1 -> 0 - x
/// TODO: support more patterns
static SmallVector<InterchangeableInstruction>
getInterchangeableInstruction(Instruction *I) {
// PII = Possible Interchangeable Instruction
SmallVector<InterchangeableInstruction> PII;
unsigned Opcode = I->getOpcode();
PII.emplace_back(Opcode, I->operands());
if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
Instruction::Add},
Opcode))
return PII;
Constant *C;
if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
ConstantInt *V = nullptr;
if (auto *CI = dyn_cast<ConstantInt>(C)) {
V = CI;
} else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
V = CI;
}
if (!V)
return PII;
Value *Op0 = I->getOperand(0);
Type *Op1Ty = I->getOperand(1)->getType();
const APInt &Op1Int = V->getValue();
Constant *Zero =
ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
Constant *UnsignedMax =
ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
switch (Opcode) {
case Instruction::Shl: {
PII.emplace_back(Instruction::Mul, Op0,
ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
if (Op1Int.isZero()) {
PII.emplace_back(Instruction::Sub, Op0, Zero);
PII.emplace_back(Instruction::Add, Op0, Zero);
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
}
break;
}
case Instruction::Mul: {
if (Op1Int.isOne()) {
PII.emplace_back(Instruction::Sub, Op0, Zero);
PII.emplace_back(Instruction::Add, Op0, Zero);
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
} else if (Op1Int.isZero()) {
PII.emplace_back(Instruction::And, Op0, Zero);
} else if (Op1Int.isAllOnes()) {
PII.emplace_back(Instruction::Sub, Zero, Op0);
}
break;
}
case Instruction::Sub:
if (Op1Int.isZero()) {
PII.emplace_back(Instruction::Add, Op0, Zero);
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
}
break;
case Instruction::Add:
if (Op1Int.isZero()) {
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
}
break;
}
}
// std::set_intersection requires a sorted range.
sort(PII);
return PII;
}

/// \returns the Op and operands which \p I convert to.
static std::pair<Value *, SmallVector<Value *>>
getInterchangeableInstruction(Instruction *I, Instruction *MainOp,
Instruction *AltOp) {
SmallVector<InterchangeableInstruction> IIList =
getInterchangeableInstruction(I);
const auto *Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
return II.Opcode == MainOp->getOpcode();
});
if (Iter == IIList.end()) {
Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
return II.Opcode == AltOp->getOpcode();
});
assert(Iter != IIList.end() &&
"Cannot find an interchangeable instruction.");
return std::make_pair(AltOp, Iter->Ops);
}
return std::make_pair(MainOp, Iter->Ops);
}

/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
///
Expand Down Expand Up @@ -1072,22 +957,6 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
}
bool AnyPoison = InstCnt != VL.size();
// Currently, this is only used for binary ops.
// TODO: support all instructions
SmallVector<InterchangeableInstruction> InterchangeableOpcode =
getInterchangeableInstruction(cast<Instruction>(V));
SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
auto UpdateInterchangeableOpcode =
[](SmallVector<InterchangeableInstruction> &LHS,
ArrayRef<InterchangeableInstruction> RHS) {
SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
std::back_inserter(NewInterchangeableOpcode));
if (NewInterchangeableOpcode.empty())
return false;
LHS.swap(NewInterchangeableOpcode);
return true;
};
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
auto *I = dyn_cast<Instruction>(VL[Cnt]);
if (!I)
Expand All @@ -1100,32 +969,14 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
unsigned InstOpcode = I->getOpcode();
if (IsBinOp && isa<BinaryOperator>(I)) {
SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
getInterchangeableInstruction(I));
if (UpdateInterchangeableOpcode(InterchangeableOpcode,
ThisInterchangeableOpcode))
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
if (AlternateInterchangeableOpcode.empty()) {
InterchangeableOpcode.erase(
remove_if(InterchangeableOpcode,
[](const InterchangeableInstruction &I) {
return !isValidForAlternation(I.Opcode);
}),
InterchangeableOpcode.end());
ThisInterchangeableOpcode.erase(
remove_if(ThisInterchangeableOpcode,
[](const InterchangeableInstruction &I) {
return !isValidForAlternation(I.Opcode);
}),
ThisInterchangeableOpcode.end());
if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
return InstructionsState::invalid();
AlternateInterchangeableOpcode.swap(ThisInterchangeableOpcode);
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
isValidForAlternation(Opcode)) {
AltOpcode = InstOpcode;
AltIndex = Cnt;
continue;
}
if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
ThisInterchangeableOpcode))
continue;
} else if (IsCastOp && isa<CastInst>(I)) {
Value *Op0 = IBase->getOperand(0);
Type *Ty0 = Op0->getType();
Expand Down Expand Up @@ -1226,24 +1077,6 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
}

if (IsBinOp) {
auto FindOp = [&](ArrayRef<InterchangeableInstruction> CandidateOp) {
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
for (const InterchangeableInstruction &I : CandidateOp)
if (cast<Instruction>(V)->getOpcode() == I.Opcode)
return cast<Instruction>(V);
}
llvm_unreachable(
"Cannot find the candidate instruction for InstructionsState.");
};
Instruction *MainOp = FindOp(InterchangeableOpcode);
Instruction *AltOp = AlternateInterchangeableOpcode.empty()
? MainOp
: FindOp(AlternateInterchangeableOpcode);
return InstructionsState(MainOp, AltOp);
}
return InstructionsState(cast<Instruction>(V),
cast<Instruction>(VL[AltIndex]));
}
Expand Down Expand Up @@ -2574,46 +2407,42 @@ class BoUpSLP {
}

/// Go through the instructions in VL and append their operands.
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *MainOp,
Instruction *AltOp) {
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0) {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
// IntrinsicInst::isCommutative returns true if swapping the first "two"
// arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
unsigned NumOperands = MainOp->getNumOperands();
ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
unsigned NumOperands = VL0->getNumOperands();
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx : seq<unsigned>(NumOperands))
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
OpsVec[OpIdx].resize(NumLanes);
for (auto [Lane, V] : enumerate(VL)) {
assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
"Expected instruction or poison value");
if (isa<PoisonValue>(V)) {
for (unsigned OpIdx : seq<unsigned>(NumOperands))
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
"Expected instruction or poison value");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
// RHS operand. The LHS operand of both add and sub is never attached
// to an inversese operation in the linearized form, therefore its APO
// is false. The RHS is true only if VL[Lane] is an inverse operation.

// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely
// tell the inverse operations by checking commutativity.
if (isa<PoisonValue>(VL[Lane])) {
OpsVec[OpIdx][Lane] = {
PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true,
false};
continue;
}
auto [SelectedOp, Ops] =
getInterchangeableInstruction(cast<Instruction>(V), MainOp, AltOp);
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of V and whether the operand at OpIdx is the LHS or RHS
// operand. The LHS operand of both add and sub is never attached to an
// inversese operation in the linearized form, therefore its APO is
// false. The RHS is true only if V is an inverse operation.

// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely
// tell the inverse operations by checking commutativity.
bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
continue;
}
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
APO, false};
}
}
}
Expand Down Expand Up @@ -2720,12 +2549,11 @@ class BoUpSLP {

public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, Instruction *MainOp,
Instruction *AltOp, const BoUpSLP &R)
VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
L(R.LI->getLoopFor(MainOp->getParent())) {
L(R.LI->getLoopFor((VL0->getParent()))) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL, MainOp, AltOp);
appendOperandsOfVL(RootVL, VL0);
}

/// \Returns a value vector with the operands across all lanes for the
Expand Down Expand Up @@ -3517,7 +3345,7 @@ class BoUpSLP {

/// Set this bundle's operand from Scalars.
void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
VLOperands Ops(Scalars, MainOp, AltOp, R);
VLOperands Ops(Scalars, MainOp, R);
if (RequireReorder)
Ops.reorder();
for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
Expand Down Expand Up @@ -8733,7 +8561,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");

ValueList Left, Right;
VLOperands Ops(VL, VL0, S.getAltOp(), *this);
VLOperands Ops(VL, VL0, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
Expand Down Expand Up @@ -15791,7 +15619,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS);
propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
if (auto *I = dyn_cast<Instruction>(V)) {
V = ::propagateMetadata(I, E->Scalars);
// Drop nuw flags for abs(sub(commutative), true).
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
Original file line number Diff line number Diff line change
Expand Up @@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@ define void @test(ptr %a, i64 %0) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP9]], [[TMP11]]
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP12]], ptr align 8 [[TMP8]], i64 -8, <2 x i1> splat (i1 true), i32 2)
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> splat (i1 true), i32 2)
; CHECK-NEXT: br label %[[BB]]
;
entry:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 9)
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
; CHECK-NEXT: ret i32 undef
;
Expand Down
Loading

0 comments on commit 3133acf

Please sign in to comment.