diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index b1c7a2682785b..dc82c92264c07 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1006,6 +1006,9 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) { // Only handle supported cast operations switch (CastOpcode) { case Instruction::BitCast: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::Trunc: break; default: return false; diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll index 761ad80d560e8..6c0ab8b9abaff 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll @@ -45,11 +45,11 @@ define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) { ; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255) ; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255) ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4) ; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP7]], [[V_MASKED]] -; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[TMP0]], splat (i32 15) +; CHECK-NEXT: [[DOTINNER:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15) +; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[DOTINNER]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], [[U_MASKED]] ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll index c6253a7b858ad..acf39a258315f 100644 --- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll @@ -287,9 +287,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) { ; Test bitwise operations with truncate and one constant define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) { ; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant( -; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[T1]], -; CHECK-NEXT: ret <4 x i16> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[T1]] ; %t1 = trunc <4 x i32> %a to <4 x i16> %or = or <4 x i16> %t1, @@ -299,9 +299,9 @@ define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) { ; Test bitwise operations with zero extend and one constant define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) { ; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant( -; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], +; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[Z1]] ; %z1 = zext <4 x i16> %a to <4 x i32> %or = or <4 x i32> %z1, @@ -322,9 +322,9 @@ define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { ; Test bitwise operations with sign extend and one constant define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant( -; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S1]] ; %s1 = sext <4 x i8> %a to <4 x i32> %or = or <4 x i32> %s1, @@ -333,9 +333,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) { define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant( -; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S1]] ; %s1 = sext <4 x i8> %a to <4 x i32> %or = or <4 x i32> %s1, @@ -356,8 +356,8 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { ; Test truncate with flag preservation and one constant define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_constant( -; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16> -; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[T1]], +; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i16> @@ -367,8 +367,8 @@ define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) { define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant( -; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> -; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], +; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> @@ -378,8 +378,8 @@ define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) { define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant( -; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> -; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], +; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = trunc <4 x i32> [[AND_INNER]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> @@ -390,8 +390,8 @@ define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) { ; Test sign extend with nneg flag and one constant define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) { ; CHECK-LABEL: @or_zext_nneg_constant( -; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i16> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = zext <4 x i16> [[OR_INNER]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i16> %a to <4 x i32> @@ -401,8 +401,8 @@ define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) { define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_zext_nneg_minus_constant( -; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i8> %a to <4 x i32> @@ -412,8 +412,8 @@ define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) { define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) { ; CHECK-LABEL: @or_zext_nneg_multiconstant( -; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i8> %a to <4 x i32>