Skip to content

Commit

Permalink
[VPlan] Replace disjoint or with add instead of dropping disjoint. (#…
Browse files Browse the repository at this point in the history
…83821)

Dropping disjoint from an OR may yield incorrect results, as some
analysis may have converted it to an Add implicitly (e.g. SCEV used for
dependence analysis). Instead, replace it with an equivalent Add.

This is possible as all users of the disjoint OR only access lanes where
the operands are disjoint or poison otherwise.

Note that replacing all disjoint ORs with ADDs instead of dropping the
flags is not strictly necessary. It is only needed for disjoint ORs that
SCEV treated as ADDs, but those are not tracked.

There are other places that may drop poison-generating flags; those
likely need similar treatment.

Fixes llvm/llvm-project#81872


PR: llvm/llvm-project#83821
  • Loading branch information
fhahn authored Mar 19, 2024
1 parent a6e231b commit c2c1e6e
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 1 deletion.
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ class VPBuilder {
public:
VPBuilder() = default;
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
VPBuilder(VPRecipeBase *InsertPt) {
setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
}

/// Clear the insertion point: created instructions will not be inserted into
/// a block.
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
return WrapFlags.HasNSW;
}

bool isDisjoint() const {
assert(OpType == OperationType::DisjointOp &&
"recipe cannot have a disjoing flag");
return DisjointFlags.IsDisjoint;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printFlags(raw_ostream &O) const;
#endif
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}

template <typename Op0_t, typename Op1_t>
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
m_Or(const Op0_t &Op0, const Op1_t &Op1) {
return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
}
} // namespace VPlanPatternMatch
} // namespace llvm

Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,23 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// load/store. If the underlying instruction has poison-generating flags,
// drop them directly.
if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
VPValue *A, *B;
using namespace llvm::VPlanPatternMatch;
// Dropping disjoint from an OR may yield incorrect results, as some
// analysis may have converted it to an Add implicitly (e.g. SCEV used
// for dependence analysis). Instead, replace it with an equivalent Add.
// This is possible as all users of the disjoint OR only access lanes
// where the operands are disjoint or poison otherwise.
if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) &&
RecWithFlags->isDisjoint()) {
VPBuilder Builder(RecWithFlags);
VPInstruction *New = Builder.createOverflowingOp(
Instruction::Add, {A, B}, {false, false},
RecWithFlags->getDebugLoc());
RecWithFlags->replaceAllUsesWith(New);
RecWithFlags->eraseFromParent();
CurRec = New;
}
RecWithFlags->dropPoisonGeneratingFlags();
} else {
Instruction *Instr = dyn_cast_or_null<Instruction>(
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3
Expand Down

0 comments on commit c2c1e6e

Please sign in to comment.