-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[VPlan] Preserve trunc nuw/nsw in VPRecipeWithIRFlags #144700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This preserves the nuw/nsw flags on widened truncs by splitting up OverflowingBinaryOperator into OverflowingOperator and OverflowingBinaryOperator. We could probably go through the other users of OverflowignBinaryOperator and see if they could be generalized to OverflowingOperator later. The motivation for this is to be able to fold away some redundant truncs feeding into uitofps (or potentially narrow the inductions feeding them)
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-ir Author: Luke Lau (lukel97) ChangesThis preserves the nuw/nsw flags on widened truncs by splitting up OverflowingBinaryOperator into OverflowingOperator and OverflowingBinaryOperator. We could probably go through the other users of OverflowignBinaryOperator and see if they could be generalized to OverflowingOperator later. The motivation for this is to be able to fold away some redundant truncs feeding into uitofps (or potentially narrow the inductions feeding them) Full diff: https://github.com/llvm/llvm-project/pull/144700.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 8344eaec807b3..13272ed2727ec 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -73,9 +73,9 @@ class Operator : public User {
};
/// Utility class for integer operators which may exhibit overflow - Add, Sub,
-/// Mul, and Shl. It does not include SDiv, despite that operator having the
-/// potential for overflow.
-class OverflowingBinaryOperator : public Operator {
+/// Mul, Shl and Trunc. It does not include SDiv, despite that operator having
+/// the potential for overflow.
+class OverflowingOperator : public Operator {
public:
enum {
AnyWrap = 0,
@@ -97,9 +97,6 @@ class OverflowingBinaryOperator : public Operator {
}
public:
- /// Transparently provide more efficient getOperand methods.
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
/// Test whether this operation is known to never
/// undergo unsigned overflow, aka the nuw property.
bool hasNoUnsignedWrap() const {
@@ -131,11 +128,32 @@ class OverflowingBinaryOperator : public Operator {
return I->getOpcode() == Instruction::Add ||
I->getOpcode() == Instruction::Sub ||
I->getOpcode() == Instruction::Mul ||
- I->getOpcode() == Instruction::Shl;
+ I->getOpcode() == Instruction::Shl ||
+ I->getOpcode() == Instruction::Trunc;
}
static bool classof(const ConstantExpr *CE) {
return CE->getOpcode() == Instruction::Add ||
- CE->getOpcode() == Instruction::Sub;
+ CE->getOpcode() == Instruction::Sub ||
+ CE->getOpcode() == Instruction::Trunc;
+ }
+ static bool classof(const Value *V) {
+ return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+ (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+ }
+};
+
+/// The subset of OverflowingOperators that are also BinaryOperators.
+class OverflowingBinaryOperator : public OverflowingOperator {
+public:
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ static bool classof(const Instruction *I) {
+ return OverflowingOperator::classof(I) && isa<BinaryOperator>(I);
+ }
+ static bool classof(const ConstantExpr *CE) {
+ return OverflowingOperator::classof(CE) &&
+ Instruction::isBinaryOp(CE->getOpcode());
}
static bool classof(const Value *V) {
return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f3306ad7cb8ec..8b9a03cb941e9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -595,7 +595,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
class VPIRFlags {
enum class OperationType : unsigned char {
Cmp,
- OverflowingBinOp,
+ OverflowingOp,
DisjointOp,
PossiblyExactOp,
GEPOp,
@@ -661,8 +661,8 @@ class VPIRFlags {
} else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
OpType = OperationType::DisjointOp;
DisjointFlags.IsDisjoint = Op->isDisjoint();
- } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
- OpType = OperationType::OverflowingBinOp;
+ } else if (auto *Op = dyn_cast<OverflowingOperator>(&I)) {
+ OpType = OperationType::OverflowingOp;
WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
} else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
OpType = OperationType::PossiblyExactOp;
@@ -686,7 +686,7 @@ class VPIRFlags {
: OpType(OperationType::Cmp), CmpPredicate(Pred) {}
VPIRFlags(WrapFlagsTy WrapFlags)
- : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
+ : OpType(OperationType::OverflowingOp), WrapFlags(WrapFlags) {}
VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
@@ -710,7 +710,7 @@ class VPIRFlags {
// NOTE: This needs to be kept in-sync with
// Instruction::dropPoisonGeneratingFlags.
switch (OpType) {
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
WrapFlags.HasNUW = false;
WrapFlags.HasNSW = false;
break;
@@ -739,7 +739,7 @@ class VPIRFlags {
/// Apply the IR flags to \p I.
void applyFlags(Instruction &I) const {
switch (OpType) {
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
I.setHasNoSignedWrap(WrapFlags.HasNSW);
break;
@@ -799,13 +799,13 @@ class VPIRFlags {
}
bool hasNoUnsignedWrap() const {
- assert(OpType == OperationType::OverflowingBinOp &&
+ assert(OpType == OperationType::OverflowingOp &&
"recipe doesn't have a NUW flag");
return WrapFlags.HasNUW;
}
bool hasNoSignedWrap() const {
- assert(OpType == OperationType::OverflowingBinOp &&
+ assert(OpType == OperationType::OverflowingOp &&
"recipe doesn't have a NSW flag");
return WrapFlags.HasNSW;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f3b5c8cfa9885..96bb6bb0d37b5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1627,9 +1627,9 @@ VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {
#if !defined(NDEBUG)
bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
switch (OpType) {
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
- Opcode == Instruction::Mul ||
+ Opcode == Instruction::Mul || Opcode == Instruction::Trunc ||
Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
case OperationType::DisjointOp:
return Opcode == Instruction::Or;
@@ -1672,7 +1672,7 @@ void VPIRFlags::printFlags(raw_ostream &O) const {
if (ExactFlags.IsExact)
O << " exact";
break;
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
if (WrapFlags.HasNUW)
O << " nuw";
if (WrapFlags.HasNSW)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 976f95ff4f0ba..8bcfce8cc52c9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1484,7 +1484,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1)
-; DEFAULT-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i32>
+; DEFAULT-NEXT: [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32>
; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DST]], align 4
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -1521,7 +1521,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20)
; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
-; PRED-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
+; PRED-NEXT: [[TMP2:%.*]] = trunc nuw nsw <4 x i64> [[TMP1]] to <4 x i32>
; PRED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; PRED-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
index 4c29a3a0d1d01..0a751699a0549 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 5
; RUN: opt -mattr=+mve -passes=loop-vectorize < %s -S -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -49,7 +49,7 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <4 x i32> [[TMP9]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <4 x i32> [[TMP10]], [[TMP8]]
; CHECK-NEXT: [[TMP12:%.*]] = lshr <4 x i32> [[TMP11]], splat (i32 16)
-; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8>
+; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i32> [[TMP12]] to <4 x i8>
; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], splat (i32 32767)
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw <4 x i32> [[TMP5]], splat (i32 16762097)
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 16759568)
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
index 9032c363eb936..9d613b8fe456d 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
@@ -41,7 +41,7 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <8 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], splat (i32 15)
; CHECK-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> splat (i32 32767))
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc nsw <8 x i32> [[TMP7]] to <8 x i16>
; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr [[NEXT_GEP14]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
@llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesThis preserves the nuw/nsw flags on widened truncs by splitting up OverflowingBinaryOperator into OverflowingOperator and OverflowingBinaryOperator. We could probably go through the other users of OverflowignBinaryOperator and see if they could be generalized to OverflowingOperator later. The motivation for this is to be able to fold away some redundant truncs feeding into uitofps (or potentially narrow the inductions feeding them) Full diff: https://github.com/llvm/llvm-project/pull/144700.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 8344eaec807b3..13272ed2727ec 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -73,9 +73,9 @@ class Operator : public User {
};
/// Utility class for integer operators which may exhibit overflow - Add, Sub,
-/// Mul, and Shl. It does not include SDiv, despite that operator having the
-/// potential for overflow.
-class OverflowingBinaryOperator : public Operator {
+/// Mul, Shl and Trunc. It does not include SDiv, despite that operator having
+/// the potential for overflow.
+class OverflowingOperator : public Operator {
public:
enum {
AnyWrap = 0,
@@ -97,9 +97,6 @@ class OverflowingBinaryOperator : public Operator {
}
public:
- /// Transparently provide more efficient getOperand methods.
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
/// Test whether this operation is known to never
/// undergo unsigned overflow, aka the nuw property.
bool hasNoUnsignedWrap() const {
@@ -131,11 +128,32 @@ class OverflowingBinaryOperator : public Operator {
return I->getOpcode() == Instruction::Add ||
I->getOpcode() == Instruction::Sub ||
I->getOpcode() == Instruction::Mul ||
- I->getOpcode() == Instruction::Shl;
+ I->getOpcode() == Instruction::Shl ||
+ I->getOpcode() == Instruction::Trunc;
}
static bool classof(const ConstantExpr *CE) {
return CE->getOpcode() == Instruction::Add ||
- CE->getOpcode() == Instruction::Sub;
+ CE->getOpcode() == Instruction::Sub ||
+ CE->getOpcode() == Instruction::Trunc;
+ }
+ static bool classof(const Value *V) {
+ return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+ (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+ }
+};
+
+/// The subset of OverflowingOperators that are also BinaryOperators.
+class OverflowingBinaryOperator : public OverflowingOperator {
+public:
+ /// Transparently provide more efficient getOperand methods.
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+ static bool classof(const Instruction *I) {
+ return OverflowingOperator::classof(I) && isa<BinaryOperator>(I);
+ }
+ static bool classof(const ConstantExpr *CE) {
+ return OverflowingOperator::classof(CE) &&
+ Instruction::isBinaryOp(CE->getOpcode());
}
static bool classof(const Value *V) {
return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f3306ad7cb8ec..8b9a03cb941e9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -595,7 +595,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
class VPIRFlags {
enum class OperationType : unsigned char {
Cmp,
- OverflowingBinOp,
+ OverflowingOp,
DisjointOp,
PossiblyExactOp,
GEPOp,
@@ -661,8 +661,8 @@ class VPIRFlags {
} else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
OpType = OperationType::DisjointOp;
DisjointFlags.IsDisjoint = Op->isDisjoint();
- } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
- OpType = OperationType::OverflowingBinOp;
+ } else if (auto *Op = dyn_cast<OverflowingOperator>(&I)) {
+ OpType = OperationType::OverflowingOp;
WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
} else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
OpType = OperationType::PossiblyExactOp;
@@ -686,7 +686,7 @@ class VPIRFlags {
: OpType(OperationType::Cmp), CmpPredicate(Pred) {}
VPIRFlags(WrapFlagsTy WrapFlags)
- : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
+ : OpType(OperationType::OverflowingOp), WrapFlags(WrapFlags) {}
VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
@@ -710,7 +710,7 @@ class VPIRFlags {
// NOTE: This needs to be kept in-sync with
// Instruction::dropPoisonGeneratingFlags.
switch (OpType) {
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
WrapFlags.HasNUW = false;
WrapFlags.HasNSW = false;
break;
@@ -739,7 +739,7 @@ class VPIRFlags {
/// Apply the IR flags to \p I.
void applyFlags(Instruction &I) const {
switch (OpType) {
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
I.setHasNoSignedWrap(WrapFlags.HasNSW);
break;
@@ -799,13 +799,13 @@ class VPIRFlags {
}
bool hasNoUnsignedWrap() const {
- assert(OpType == OperationType::OverflowingBinOp &&
+ assert(OpType == OperationType::OverflowingOp &&
"recipe doesn't have a NUW flag");
return WrapFlags.HasNUW;
}
bool hasNoSignedWrap() const {
- assert(OpType == OperationType::OverflowingBinOp &&
+ assert(OpType == OperationType::OverflowingOp &&
"recipe doesn't have a NSW flag");
return WrapFlags.HasNSW;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f3b5c8cfa9885..96bb6bb0d37b5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1627,9 +1627,9 @@ VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {
#if !defined(NDEBUG)
bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
switch (OpType) {
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
- Opcode == Instruction::Mul ||
+ Opcode == Instruction::Mul || Opcode == Instruction::Trunc ||
Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
case OperationType::DisjointOp:
return Opcode == Instruction::Or;
@@ -1672,7 +1672,7 @@ void VPIRFlags::printFlags(raw_ostream &O) const {
if (ExactFlags.IsExact)
O << " exact";
break;
- case OperationType::OverflowingBinOp:
+ case OperationType::OverflowingOp:
if (WrapFlags.HasNUW)
O << " nuw";
if (WrapFlags.HasNSW)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 976f95ff4f0ba..8bcfce8cc52c9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1484,7 +1484,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1)
-; DEFAULT-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i32>
+; DEFAULT-NEXT: [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32>
; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DST]], align 4
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -1521,7 +1521,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
; PRED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20)
; PRED-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
-; PRED-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
+; PRED-NEXT: [[TMP2:%.*]] = trunc nuw nsw <4 x i64> [[TMP1]] to <4 x i32>
; PRED-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; PRED-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
index 4c29a3a0d1d01..0a751699a0549 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 5
; RUN: opt -mattr=+mve -passes=loop-vectorize < %s -S -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -49,7 +49,7 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <4 x i32> [[TMP9]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <4 x i32> [[TMP10]], [[TMP8]]
; CHECK-NEXT: [[TMP12:%.*]] = lshr <4 x i32> [[TMP11]], splat (i32 16)
-; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8>
+; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i32> [[TMP12]] to <4 x i8>
; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], splat (i32 32767)
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw <4 x i32> [[TMP5]], splat (i32 16762097)
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 16759568)
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
index 9032c363eb936..9d613b8fe456d 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
@@ -41,7 +41,7 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <8 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], splat (i32 15)
; CHECK-NEXT: [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> splat (i32 32767))
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc nsw <8 x i32> [[TMP7]] to <8 x i16>
; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr [[NEXT_GEP14]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer not to have some umbrella type for Trunc + OverflowingBinaryOperator. Even if the flag names are the same, the semantics are very different. Can we instead add an extra OperationType in VPlan?
That makes sense, I've gone ahead and removed it. I reused the same OperationType in VPlan since it feels like it's mostly a tag for the underlying union |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG. TBH nsw/nuw are bad names for trunc :(
@@ -595,7 +595,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |||
class VPIRFlags { | |||
enum class OperationType : unsigned char { | |||
Cmp, | |||
OverflowingBinOp, | |||
WrappingOp, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we keep them separate on the IR level, it probably also makes sense to keep them separate here, with a separate TruncOp kind for consistency?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I gave this a try but it ended up being quite hairy: a3bce75
Everytime you create a VPIRFlags through the implicit WrapFlagsTy constructor you now need to also check if the thing you're passing in is a trunc or not to set the "correct" OperationType. But in reality it doesn't make a difference if the OperationType is OverflowingBinOp or TruncOp because they behave the same.
We could maybe rename OperationType to FlagsKind if we want to emphasise that it only determines the type of flag storage. But I'm not strongly opinionated on this, happy to go with whatever reviewers prefer
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add TruncFlagsTy
for trunc? I am not sure if WrapFlagsTy
will be extended to handle more flags (e.g., nnuw #86061 (comment)).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that crossed my mind too, I've added it in 0a7258a and it seems to work well.
My only concern was the VPWidenRecipe(unsigned VPDefOpcode, unsigned Opcode, ArrayRef<VPValue *> Operands, bool NUW, bool NSW, DebugLoc DL)
constructor but I think the VPIRFlags::flagsValidForOpcode
check should catch any cases where a trunc opcode is passed into it.
This preserves the nuw/nsw flags on widened truncs by checking for TruncInst in the VPIRFlags constructor
The motivation for this is to be able to fold away some redundant truncs feeding into uitofps (or potentially narrow the inductions feeding them)