Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SLP][REVEC] getWidenedType should be used instead of FixedVectorType::get. #109843

Merged
merged 2 commits into from
Sep 24, 2024

Conversation

HanKuanChen
Copy link
Contributor

reference: #109835

@llvmbot
Copy link
Member

llvmbot commented Sep 24, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Han-Kuan Chen (HanKuanChen)

Changes

reference: #109835


Full diff: https://github.com/llvm/llvm-project/pull/109843.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+2-2)
  • (added) llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll (+70)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7e3dbe6260983e..b79e964cdb1b6b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9986,8 +9986,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
         }
         Cost += ::getShuffleCost(
             TTI, TTI::SK_InsertSubvector,
-            FixedVectorType::get(ScalarTy, CommonMask.size()), {}, CostKind,
-            Idx, FixedVectorType::get(ScalarTy, E->getVectorFactor()));
+            getWidenedType(ScalarTy, CommonMask.size()), {}, CostKind, Idx,
+            getWidenedType(ScalarTy, E->getVectorFactor()));
         if (!CommonMask.empty()) {
           std::iota(std::next(CommonMask.begin(), Idx),
                     std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll
new file mode 100644
index 00000000000000..965bfc7074c638
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+@b = external dso_local local_unnamed_addr global i64, align 8
+@d = external dso_local local_unnamed_addr global i32, align 4
+@c = external dso_local local_unnamed_addr global i32, align 4
+@a = external dso_local local_unnamed_addr global i8, align 2
+
+define void @e() {
+; CHECK-LABEL: @e(
+; CHECK-NEXT:  vector.ph:
+; CHECK-NEXT:    [[C_PROMOTED5:%.*]] = load i32, ptr @c, align 4
+; CHECK-NEXT:    [[A_PROMOTED7:%.*]] = load i8, ptr @a, align 2
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[C_PROMOTED5]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i8> <i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, i8 [[A_PROMOTED7]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 -6, i32 3, i32 12, i32 21, i32 30, i32 39, i32 48, i32 57, i32 66, i32 75, i32 84, i32 93, i32 102, i32 111, i32 120, i32 129>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 -4, i32 5, i32 14, i32 23, i32 32, i32 41, i32 50, i32 59, i32 68, i32 77, i32 86, i32 95, i32 104, i32 113, i32 122, i32 131>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 -2, i32 7, i32 16, i32 25, i32 34, i32 43, i32 52, i32 61, i32 70, i32 79, i32 88, i32 97, i32 106, i32 115, i32 124, i32 133>
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 0, i32 9, i32 18, i32 27, i32 36, i32 45, i32 54, i32 63, i32 72, i32 81, i32 90, i32 99, i32 108, i32 117, i32 126, i32 135>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <16 x i32> [[DOTSPLAT]], <i32 -1, i32 -10, i32 -19, i32 -28, i32 -37, i32 -46, i32 -55, i32 -64, i32 -73, i32 -82, i32 -91, i32 -100, i32 -109, i32 -118, i32 -127, i32 -136>
+; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i1> [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP6]]
+; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i1> [[TMP10]], [[TMP7]]
+; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i1> [[TMP11]], [[TMP8]]
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <16 x i1> [[TMP12]] to <16 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i8> [[TMP0]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP15]])
+; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[C_PROMOTED5]], 81
+; CHECK-NEXT:    store i64 -1, ptr @b, align 8
+; CHECK-NEXT:    store i32 9, ptr @d, align 4
+; CHECK-NEXT:    store i32 [[TMP17]], ptr @c, align 4
+; CHECK-NEXT:    store i8 [[TMP16]], ptr @a, align 2
+; CHECK-NEXT:    ret void
+;
+vector.ph:
+  %c.promoted5 = load i32, ptr @c, align 4
+  %a.promoted7 = load i8, ptr @a, align 2
+  %.splatinsert = insertelement <16 x i32> poison, i32 %c.promoted5, i64 0
+  %.splat = shufflevector <16 x i32> %.splatinsert, <16 x i32> poison, <16 x i32> zeroinitializer
+  %0 = insertelement <16 x i8> <i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, i8 %a.promoted7, i64 0
+  %1 = add <16 x i32> %.splat, <i32 -6, i32 3, i32 12, i32 21, i32 30, i32 39, i32 48, i32 57, i32 66, i32 75, i32 84, i32 93, i32 102, i32 111, i32 120, i32 129>
+  %2 = add <16 x i32> %.splat, <i32 -4, i32 5, i32 14, i32 23, i32 32, i32 41, i32 50, i32 59, i32 68, i32 77, i32 86, i32 95, i32 104, i32 113, i32 122, i32 131>
+  %3 = add <16 x i32> %.splat, <i32 -2, i32 7, i32 16, i32 25, i32 34, i32 43, i32 52, i32 61, i32 70, i32 79, i32 88, i32 97, i32 106, i32 115, i32 124, i32 133>
+  %induction = add <16 x i32> %.splat, <i32 0, i32 9, i32 18, i32 27, i32 36, i32 45, i32 54, i32 63, i32 72, i32 81, i32 90, i32 99, i32 108, i32 117, i32 126, i32 135>
+  %4 = icmp ult <16 x i32> %1, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %5 = icmp ult <16 x i32> %2, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %6 = icmp ult <16 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %7 = icmp ult <16 x i32> %induction, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  %8 = icmp eq <16 x i32> %.splat, <i32 -1, i32 -10, i32 -19, i32 -28, i32 -37, i32 -46, i32 -55, i32 -64, i32 -73, i32 -82, i32 -91, i32 -100, i32 -109, i32 -118, i32 -127, i32 -136>
+  %9 = or <16 x i1> %4, %5
+  %10 = or <16 x i1> %9, %6
+  %11 = or <16 x i1> %10, %7
+  %12 = or <16 x i1> %11, %8
+  %13 = zext <16 x i1> %12 to <16 x i8>
+  %14 = or <16 x i8> %0, %13
+  %15 = shufflevector <16 x i8> %14, <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %15)
+  %17 = add i32 %c.promoted5, 81
+  store i64 -1, ptr @b, align 8
+  store i32 9, ptr @d, align 4
+  store i32 %17, ptr @c, align 4
+  store i8 %16, ptr @a, align 2
+  ret void
+}

Copy link
Member

@alexey-bataev alexey-bataev left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

@HanKuanChen HanKuanChen merged commit 6d3d5f3 into llvm:main Sep 24, 2024
8 of 10 checks passed
@HanKuanChen HanKuanChen deleted the slp-revec-fix-109835 branch September 24, 2024 18:23
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Sep 26, 2024
xgupta pushed a commit to xgupta/llvm-project that referenced this pull request Oct 4, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants