-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VectorCombine] Add Ext and Trunc support in foldBitOpOfCastConstant #157822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VectorCombine] Add Ext and Trunc support in foldBitOpOfCastConstant #157822
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Hongyu Chen (XChy) ChangesFollow-up of #155216. Full diff: https://github.com/llvm/llvm-project/pull/157822.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b1c7a2682785b..dc82c92264c07 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1006,6 +1006,9 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) {
// Only handle supported cast operations
switch (CastOpcode) {
case Instruction::BitCast:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::Trunc:
break;
default:
return false;
diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
index c6253a7b858ad..acf39a258315f 100644
--- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll
@@ -287,9 +287,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) {
; Test bitwise operations with truncate and one constant
define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant(
-; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
-; CHECK-NEXT: ret <4 x i16> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i32> [[A1:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A]] to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> [[T1]]
;
%t1 = trunc <4 x i32> %a to <4 x i16>
%or = or <4 x i16> %t1, <i16 1, i16 2, i16 3, i16 4>
@@ -299,9 +299,9 @@ define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) {
; Test bitwise operations with zero extend and one constant
define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) {
; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant(
-; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: ret <4 x i32> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[Z1]]
;
%z1 = zext <4 x i16> %a to <4 x i32>
%or = or <4 x i32> %z1, <i32 1, i32 2, i32 3, i32 4>
@@ -322,9 +322,9 @@ define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
; Test bitwise operations with sign extend and one constant
define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant(
-; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: ret <4 x i32> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 1, i8 2, i8 3, i8 4>
+; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[S1]]
;
%s1 = sext <4 x i8> %a to <4 x i32>
%or = or <4 x i32> %s1, <i32 1, i32 2, i32 3, i32 4>
@@ -333,9 +333,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) {
define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) {
; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant(
-; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], <i32 -1, i32 -2, i32 -3, i32 -4>
-; CHECK-NEXT: ret <4 x i32> [[OR]]
+; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], <i8 -1, i8 -2, i8 -3, i8 -4>
+; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[S1]]
;
%s1 = sext <4 x i8> %a to <4 x i32>
%or = or <4 x i32> %s1, <i32 -1, i32 -2, i32 -3, i32 -4>
@@ -356,8 +356,8 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) {
; Test truncate with flag preservation and one constant
define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
; CHECK-LABEL: @and_trunc_nuw_nsw_constant(
-; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[T1]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
%t1 = trunc nuw nsw <4 x i32> %a to <4 x i16>
@@ -367,8 +367,8 @@ define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) {
define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant(
-; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[AND]]
;
%t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -378,8 +378,8 @@ define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) {
define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant(
-; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8>
-; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[AND]]
;
%t1 = trunc nuw nsw <4 x i32> %a to <4 x i8>
@@ -390,8 +390,8 @@ define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) {
; Test sign extend with nneg flag and one constant
define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
; CHECK-LABEL: @or_zext_nneg_constant(
-; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i16> [[A:%.*]], <i16 1, i16 2, i16 3, i16 4>
+; CHECK-NEXT: [[OR:%.*]] = zext <4 x i16> [[OR_INNER]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%z1 = zext nneg <4 x i16> %a to <4 x i32>
@@ -401,8 +401,8 @@ define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) {
define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
; CHECK-LABEL: @or_zext_nneg_minus_constant(
-; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 241, i32 242, i32 243>
+; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 -15, i8 -14, i8 -13>
+; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%z1 = zext nneg <4 x i8> %a to <4 x i32>
@@ -412,8 +412,8 @@ define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) {
define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) {
; CHECK-LABEL: @or_zext_nneg_multiconstant(
-; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], <i32 240, i32 1, i32 242, i32 3>
+; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], <i8 -16, i8 1, i8 -14, i8 3>
+; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%z1 = zext nneg <4 x i8> %a to <4 x i32>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could use splat for cases like these for better visibility:
%u.masked = and <16 x i32> %u, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@Rajveer100. Do you mean replacing terms like |
Not quite, this would change the test case itself, I meant replacing the ones where they all are equal not affecting the actual values. |
Oh, you mean updating |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Follow-up of #155216.
This patch doesn't preserve the flags. I will implement it in the follow-up patch.