From df5a69313f1ae3688a58a93f3f3f7bec3b99d5be Mon Sep 17 00:00:00 2001 From: XChy Date: Mon, 25 Aug 2025 01:19:07 +0800 Subject: [PATCH 1/3] [VectorCombine] NFC. Add tests for 154797 --- .../VectorCombine/X86/bitop-of-castops.ll | 160 ++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll index 220556c8c38c3..b7c9e35ecc6f1 100644 --- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll @@ -260,3 +260,163 @@ define <4 x i32> @or_zext_nneg(<4 x i16> %a, <4 x i16> %b) { %or = or <4 x i32> %z1, %z2 ret <4 x i32> %or } + +; Test bitwise operations with integer-to-integer bitcast with one constant +define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant(<4 x i16> %a) { +; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant( +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[BC1]], +; CHECK-NEXT: ret <2 x i32> [[OR]] +; + %bc1 = bitcast <4 x i16> %a to <2 x i32> + %or = or <2 x i32> %bc1, + ret <2 x i32> %or +} + +define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) { +; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant_commuted( +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> , [[BC1]] +; CHECK-NEXT: ret <2 x i32> [[OR]] +; + %bc1 = bitcast <4 x i16> %a to <2 x i32> + %or = or <2 x i32> , %bc1 + ret <2 x i32> %or +} + +; Test bitwise operations with truncate and one constant +define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) { +; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant( +; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16> +; CHECK-NEXT: [[AND:%.*]] = or <4 x i16> [[T1]], +; CHECK-NEXT: ret <4 x i16> [[AND]] +; + %t1 = trunc <4 x i32> %a to <4 x i16> + %or = or <4 x i16> %t1, + ret <4 x i16> %or +} + +; Test bitwise operations with zero extend and one constant +define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) { +; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant( +; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[AND]] +; + %z1 = zext <4 x i16> %a to <4 x i32> + %or = or <4 x i32> %z1, + ret <4 x i32> %or +} + +define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { +; CHECK-LABEL: @or_zext_v4i8_to_v4i32_constant_with_loss( +; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %z1 = zext <4 x i8> %a to <4 x i32> + %or = or <4 x i32> %z1, + ret <4 x i32> %or +} + +; Test bitwise operations with sign extend and one constant +define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) { +; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant( +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %s1 = sext <4 x i8> %a to <4 x i32> + %or = or <4 x i32> %s1, + ret <4 x i32> %or +} + +define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) { +; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant( +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %s1 = sext <4 x i8> %a to <4 x i32> + %or = or <4 x i32> %s1, + ret <4 x i32> %or +} + +define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { +; CHECK-LABEL: @or_sext_v4i8_to_v4i32_constant_with_loss( +; CHECK-NEXT: [[Z1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %z1 = sext <4 x i8> %a to <4 x i32> + %or = or <4 x i32> %z1, + ret <4 x i32> %or +} + +; Test truncate with flag preservation and one constant +define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) { +; CHECK-LABEL: @and_trunc_nuw_nsw_constant( +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[T1]], +; CHECK-NEXT: ret <4 x i16> [[AND]] +; + %t1 = trunc nuw nsw <4 x i32> %a to <4 x i16> + %and = and <4 x i16> %t1, + ret <4 x i16> %and +} + +define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) { +; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant( +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], +; CHECK-NEXT: ret <4 x i8> [[AND]] +; + %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> + %and = and <4 x i8> %t1, + ret <4 x i8> %and +} + +define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) { +; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant( +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], +; CHECK-NEXT: ret <4 x i8> [[AND]] +; + %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> + %and = and <4 x i8> %t1, + ret <4 x i8> %and +} + +; Test sign extend with nneg flag and one constant +define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) { +; CHECK-LABEL: @or_zext_nneg_constant( +; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %z1 = zext nneg <4 x i16> %a to <4 x i32> + %or = or <4 x i32> %z1, + ret <4 x i32> %or +} + +define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) { +; CHECK-LABEL: @or_zext_nneg_minus_constant( +; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %z1 = zext nneg <4 x i8> %a to <4 x i32> + %or = or <4 x i32> %z1, + ret <4 x i32> %or +} + +define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) { +; CHECK-LABEL: @or_zext_nneg_multiconstant( +; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %z1 = zext nneg <4 x i8> %a to <4 x i32> + %or = or <4 x i32> %z1, + ret <4 x i32> %or +} From c363fc2ecfbaed13125c12de685c8a5f90c522b7 Mon Sep 17 00:00:00 2001 From: XChy Date: Mon, 25 Aug 2025 15:04:16 +0800 Subject: [PATCH 2/3] [VectorCombine] Support pattern bitop(cast(x), C) -> bitop(cast(x), cast(InvC)) --- .../Transforms/Vectorize/VectorCombine.cpp | 156 ++++++++++++++++++ .../VectorCombine/AArch64/shrink-types.ll | 4 +- .../VectorCombine/X86/bitop-of-castops.ll | 64 +++---- 3 files changed, 190 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index c88ed95de2946..57f4170b06a53 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -122,6 +122,7 @@ class VectorCombine { bool foldInsExtBinop(Instruction &I); bool foldInsExtVectorToShuffle(Instruction &I); bool foldBitOpOfCastops(Instruction &I); + bool foldBitOpOfCastConstant(Instruction &I); bool foldBitcastShuffle(Instruction &I); bool scalarizeOpOrCmp(Instruction &I); bool scalarizeVPIntrinsic(Instruction &I); @@ -937,6 +938,159 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) { return true; } +struct PreservedCastFlags { + bool NNeg = false; + bool NUW = false; + bool NSW = false; +}; + +// Try to cast C to InvC losslessly, satisfying CastOp(InvC) == C. +// Will try best to preserve the flags. +static Constant *getLosslessInvCast(Constant *C, Type *InvCastTo, + Instruction::CastOps CastOp, + const DataLayout &DL, + PreservedCastFlags &Flags) { + switch (CastOp) { + case Instruction::BitCast: + // Bitcast is always lossless. + return ConstantFoldCastOperand(Instruction::BitCast, C, InvCastTo, DL); + case Instruction::Trunc: { + auto *ZExtC = ConstantFoldCastOperand(Instruction::ZExt, C, InvCastTo, DL); + auto *SExtC = ConstantFoldCastOperand(Instruction::SExt, C, InvCastTo, DL); + // Truncation back on ZExt value is always NUW. + Flags.NUW = true; + // Test positivity of C. + Flags.NSW = ZExtC == SExtC; + return ZExtC; + } + case Instruction::SExt: + case Instruction::ZExt: { + auto *InvC = ConstantExpr::getTrunc(C, InvCastTo); + auto *CastInvC = ConstantFoldCastOperand(CastOp, InvC, C->getType(), DL); + // Must satisfy CastOp(InvC) == C. + if (!CastInvC || CastInvC != C) + return nullptr; + if (CastOp == Instruction::ZExt) { + auto *SExtInvC = + ConstantFoldCastOperand(Instruction::SExt, InvC, C->getType(), DL); + // Test positivity of InvC. + Flags.NNeg = CastInvC == SExtInvC; + } + return InvC; + } + default: + return nullptr; + } +} + +/// Match: +// bitop(castop(x), C) -> +// bitop(castop(x), castop(InvC)) -> +// castop(bitop(x, InvC)) +// Supports: bitcast, trunc, sext, zext +bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) { + Instruction *LHS; + Constant *C; + + // Check if this is a bitwise logic operation + if (!match(&I, m_c_BitwiseLogic(m_Instruction(LHS), m_Constant(C)))) + return false; + + // Get the cast instructions + auto *LHSCast = dyn_cast(LHS); + if (!LHSCast) + return false; + + Instruction::CastOps CastOpcode = LHSCast->getOpcode(); + + // Only handle supported cast operations + switch (CastOpcode) { + case Instruction::BitCast: + case Instruction::Trunc: + case Instruction::SExt: + case Instruction::ZExt: + break; + default: + return false; + } + + Value *LHSSrc = LHSCast->getOperand(0); + + // Only handle vector types with integer elements + auto *SrcVecTy = dyn_cast(LHSSrc->getType()); + auto *DstVecTy = dyn_cast(I.getType()); + if (!SrcVecTy || !DstVecTy) + return false; + + if (!SrcVecTy->getScalarType()->isIntegerTy() || + !DstVecTy->getScalarType()->isIntegerTy()) + return false; + + // Find the constant InvC, such that castop(InvC) equals to C. + PreservedCastFlags RHSFlags; + Constant *InvC = getLosslessInvCast(C, SrcVecTy, CastOpcode, *DL, RHSFlags); + if (!InvC) + return false; + + // Cost Check : + // OldCost = bitlogic + cast + // NewCost = bitlogic + cast + + // Calculate specific costs for each cast with instruction context + InstructionCost LHSCastCost = + TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy, + TTI::CastContextHint::None, CostKind, LHSCast); + + InstructionCost OldCost = + TTI.getArithmeticInstrCost(I.getOpcode(), DstVecTy, CostKind) + + LHSCastCost; + + // For new cost, we can't provide an instruction (it doesn't exist yet) + InstructionCost GenericCastCost = TTI.getCastInstrCost( + CastOpcode, DstVecTy, SrcVecTy, TTI::CastContextHint::None, CostKind); + + InstructionCost NewCost = + TTI.getArithmeticInstrCost(I.getOpcode(), SrcVecTy, CostKind) + + GenericCastCost; + + // Account for multi-use casts using specific costs + if (!LHSCast->hasOneUse()) + NewCost += LHSCastCost; + + LLVM_DEBUG(dbgs() << "foldBitOpOfCastConstant: OldCost=" << OldCost + << " NewCost=" << NewCost << "\n"); + + if (NewCost > OldCost) + return false; + + // Create the operation on the source type + Value *NewOp = Builder.CreateBinOp((Instruction::BinaryOps)I.getOpcode(), + LHSSrc, InvC, I.getName() + ".inner"); + if (auto *NewBinOp = dyn_cast(NewOp)) + NewBinOp->copyIRFlags(&I); + + Worklist.pushValue(NewOp); + + // Create the cast operation directly to ensure we get a new instruction + Instruction *NewCast = CastInst::Create(CastOpcode, NewOp, I.getType()); + + // Preserve cast instruction flags + if (RHSFlags.NNeg) + NewCast->setNonNeg(); + if (RHSFlags.NSW) + NewCast->setHasNoSignedWrap(); + if (RHSFlags.NUW) + NewCast->setHasNoUnsignedWrap(); + + NewCast->andIRFlags(LHSCast); + + // Insert the new instruction + Value *Result = Builder.Insert(NewCast); + + replaceValue(I, *Result); + return true; +} + /// If this is a bitcast of a shuffle, try to bitcast the source vector to the /// destination type followed by shuffle. This can enable further transforms by /// moving bitcasts or shuffles together. @@ -4474,6 +4628,8 @@ bool VectorCombine::run() { case Instruction::Xor: if (foldBitOpOfCastops(I)) return true; + if (foldBitOpOfCastConstant(I)) + return true; break; case Instruction::PHI: if (shrinkPhiOfShuffles(I)) diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll index 761ad80d560e8..6c0ab8b9abaff 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll @@ -45,11 +45,11 @@ define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) { ; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255) ; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255) ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 -; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4) ; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP7]], [[V_MASKED]] -; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[TMP0]], splat (i32 15) +; CHECK-NEXT: [[DOTINNER:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15) +; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[DOTINNER]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], [[U_MASKED]] ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll index b7c9e35ecc6f1..cd77818a2f9b6 100644 --- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll @@ -264,9 +264,9 @@ define <4 x i32> @or_zext_nneg(<4 x i16> %a, <4 x i16> %b) { ; Test bitwise operations with integer-to-integer bitcast with one constant define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant(<4 x i16> %a) { ; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant( -; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[BC1]], -; CHECK-NEXT: ret <2 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[BC1]] ; %bc1 = bitcast <4 x i16> %a to <2 x i32> %or = or <2 x i32> %bc1, @@ -275,9 +275,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant(<4 x i16> %a) { define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) { ; CHECK-LABEL: @or_bitcast_v4i16_to_v2i32_constant_commuted( -; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i16> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> , [[BC1]] -; CHECK-NEXT: ret <2 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i16> [[A]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[BC1]] ; %bc1 = bitcast <4 x i16> %a to <2 x i32> %or = or <2 x i32> , %bc1 @@ -287,9 +287,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) { ; Test bitwise operations with truncate and one constant define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) { ; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant( -; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16> -; CHECK-NEXT: [[AND:%.*]] = or <4 x i16> [[T1]], -; CHECK-NEXT: ret <4 x i16> [[AND]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[T1]] ; %t1 = trunc <4 x i32> %a to <4 x i16> %or = or <4 x i16> %t1, @@ -299,9 +299,9 @@ define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) { ; Test bitwise operations with zero extend and one constant define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) { ; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant( -; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[AND:%.*]] = or <4 x i32> [[Z1]], -; CHECK-NEXT: ret <4 x i32> [[AND]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], +; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[Z1]] ; %z1 = zext <4 x i16> %a to <4 x i32> %or = or <4 x i32> %z1, @@ -322,9 +322,9 @@ define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { ; Test bitwise operations with sign extend and one constant define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant( -; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S1]] ; %s1 = sext <4 x i8> %a to <4 x i32> %or = or <4 x i32> %s1, @@ -333,9 +333,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) { define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant( -; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S1]] ; %s1 = sext <4 x i8> %a to <4 x i32> %or = or <4 x i32> %s1, @@ -356,9 +356,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { ; Test truncate with flag preservation and one constant define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_constant( -; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16> -; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[T1]], -; CHECK-NEXT: ret <4 x i16> [[AND]] +; CHECK-NEXT: [[A:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A]] to <4 x i16> +; CHECK-NEXT: ret <4 x i16> [[T1]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i16> %and = and <4 x i16> %t1, @@ -367,8 +367,8 @@ define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) { define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant( -; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> -; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], +; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = trunc nuw <4 x i32> [[AND_INNER]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> @@ -378,8 +378,8 @@ define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) { define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant( -; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> -; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], +; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND:%.*]] = trunc nuw <4 x i32> [[AND_INNER]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> @@ -390,9 +390,9 @@ define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) { ; Test sign extend with nneg flag and one constant define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) { ; CHECK-LABEL: @or_zext_nneg_constant( -; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], -; CHECK-NEXT: ret <4 x i32> [[OR]] +; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], +; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[Z1]] ; %z1 = zext nneg <4 x i16> %a to <4 x i32> %or = or <4 x i32> %z1, @@ -401,8 +401,8 @@ define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) { define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_zext_nneg_minus_constant( -; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i8> %a to <4 x i32> @@ -412,8 +412,8 @@ define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) { define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) { ; CHECK-LABEL: @or_zext_nneg_multiconstant( -; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i8> %a to <4 x i32> From 2a00bb5ad497915fcc00181059a5cff467449ebc Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 2 Sep 2025 01:28:57 +0800 Subject: [PATCH 3/3] split the patch --- .../Transforms/Vectorize/VectorCombine.cpp | 15 +----- .../VectorCombine/AArch64/shrink-types.ll | 4 +- .../VectorCombine/X86/bitop-of-castops.ll | 52 +++++++++---------- 3 files changed, 29 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 57f4170b06a53..7d28e6ab2f2bc 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -987,7 +987,7 @@ static Constant *getLosslessInvCast(Constant *C, Type *InvCastTo, // bitop(castop(x), C) -> // bitop(castop(x), castop(InvC)) -> // castop(bitop(x, InvC)) -// Supports: bitcast, trunc, sext, zext +// Supports: bitcast bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) { Instruction *LHS; Constant *C; @@ -1006,9 +1006,6 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) { // Only handle supported cast operations switch (CastOpcode) { case Instruction::BitCast: - case Instruction::Trunc: - case Instruction::SExt: - case Instruction::ZExt: break; default: return false; @@ -1074,16 +1071,6 @@ bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) { // Create the cast operation directly to ensure we get a new instruction Instruction *NewCast = CastInst::Create(CastOpcode, NewOp, I.getType()); - // Preserve cast instruction flags - if (RHSFlags.NNeg) - NewCast->setNonNeg(); - if (RHSFlags.NSW) - NewCast->setHasNoSignedWrap(); - if (RHSFlags.NUW) - NewCast->setHasNoUnsignedWrap(); - - NewCast->andIRFlags(LHSCast); - // Insert the new instruction Value *Result = Builder.Insert(NewCast); diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll index 6c0ab8b9abaff..761ad80d560e8 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll @@ -45,11 +45,11 @@ define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) { ; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255) ; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255) ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4) ; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP7]], [[V_MASKED]] -; CHECK-NEXT: [[DOTINNER:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15) -; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[DOTINNER]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[TMP0]], splat (i32 15) ; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], [[U_MASKED]] ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) diff --git a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll index cd77818a2f9b6..ca707ca08f169 100644 --- a/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll +++ b/llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll @@ -287,9 +287,9 @@ define <2 x i32> @or_bitcast_v4i16_to_v2i32_constant_commuted(<4 x i16> %a) { ; Test bitwise operations with truncate and one constant define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) { ; CHECK-LABEL: @or_trunc_v4i32_to_v4i16_constant( -; CHECK-NEXT: [[A:%.*]] = or <4 x i32> [[A1:%.*]], -; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A]] to <4 x i16> -; CHECK-NEXT: ret <4 x i16> [[T1]] +; CHECK-NEXT: [[T1:%.*]] = trunc <4 x i32> [[A:%.*]] to <4 x i16> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[T1]], +; CHECK-NEXT: ret <4 x i16> [[OR]] ; %t1 = trunc <4 x i32> %a to <4 x i16> %or = or <4 x i16> %t1, @@ -299,9 +299,9 @@ define <4 x i16> @or_trunc_v4i32_to_v4i16_constant(<4 x i32> %a) { ; Test bitwise operations with zero extend and one constant define <4 x i32> @or_zext_v4i16_to_v4i32_constant(<4 x i16> %a) { ; CHECK-LABEL: @or_zext_v4i16_to_v4i32_constant( -; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], -; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[Z1]] +; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext <4 x i16> %a to <4 x i32> %or = or <4 x i32> %z1, @@ -322,9 +322,9 @@ define <4 x i32> @or_zext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { ; Test bitwise operations with sign extend and one constant define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_positive_constant( -; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], -; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[S1]] +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] ; %s1 = sext <4 x i8> %a to <4 x i32> %or = or <4 x i32> %s1, @@ -333,9 +333,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_positive_constant(<4 x i8> %a) { define <4 x i32> @or_sext_v4i8_to_v4i32_minus_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_sext_v4i8_to_v4i32_minus_constant( -; CHECK-NEXT: [[A:%.*]] = or <4 x i8> [[A1:%.*]], -; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[S1]] +; CHECK-NEXT: [[S1:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[S1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] ; %s1 = sext <4 x i8> %a to <4 x i32> %or = or <4 x i32> %s1, @@ -356,9 +356,9 @@ define <4 x i32> @or_sext_v4i8_to_v4i32_constant_with_loss(<4 x i8> %a) { ; Test truncate with flag preservation and one constant define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_constant( -; CHECK-NEXT: [[A:%.*]] = and <4 x i32> [[A1:%.*]], -; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A]] to <4 x i16> -; CHECK-NEXT: ret <4 x i16> [[T1]] +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i16> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[T1]], +; CHECK-NEXT: ret <4 x i16> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i16> %and = and <4 x i16> %t1, @@ -367,8 +367,8 @@ define <4 x i16> @and_trunc_nuw_nsw_constant(<4 x i32> %a) { define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_minus_constant( -; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[AND:%.*]] = trunc nuw <4 x i32> [[AND_INNER]] to <4 x i8> +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], ; CHECK-NEXT: ret <4 x i8> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> @@ -378,8 +378,8 @@ define <4 x i8> @and_trunc_nuw_nsw_minus_constant(<4 x i32> %a) { define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) { ; CHECK-LABEL: @and_trunc_nuw_nsw_multiconstant( -; CHECK-NEXT: [[AND_INNER:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[AND:%.*]] = trunc nuw <4 x i32> [[AND_INNER]] to <4 x i8> +; CHECK-NEXT: [[T1:%.*]] = trunc nuw nsw <4 x i32> [[A:%.*]] to <4 x i8> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i8> [[T1]], ; CHECK-NEXT: ret <4 x i8> [[AND]] ; %t1 = trunc nuw nsw <4 x i32> %a to <4 x i8> @@ -390,9 +390,9 @@ define <4 x i8> @and_trunc_nuw_nsw_multiconstant(<4 x i32> %a) { ; Test sign extend with nneg flag and one constant define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) { ; CHECK-LABEL: @or_zext_nneg_constant( -; CHECK-NEXT: [[A:%.*]] = or <4 x i16> [[A1:%.*]], -; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[Z1]] +; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i16> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], +; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i16> %a to <4 x i32> %or = or <4 x i32> %z1, @@ -401,8 +401,8 @@ define <4 x i32> @or_zext_nneg_constant(<4 x i16> %a) { define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) { ; CHECK-LABEL: @or_zext_nneg_minus_constant( -; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], -; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32> +; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i8> %a to <4 x i32> @@ -412,8 +412,8 @@ define <4 x i32> @or_zext_nneg_minus_constant(<4 x i8> %a) { define <4 x i32> @or_zext_nneg_multiconstant(<4 x i8> %a) { ; CHECK-LABEL: @or_zext_nneg_multiconstant( -; CHECK-NEXT: [[OR_INNER:%.*]] = or <4 x i8> [[A:%.*]], -; CHECK-NEXT: [[OR:%.*]] = zext <4 x i8> [[OR_INNER]] to <4 x i32> +; CHECK-NEXT: [[Z1:%.*]] = zext nneg <4 x i8> [[A:%.*]] to <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[Z1]], ; CHECK-NEXT: ret <4 x i32> [[OR]] ; %z1 = zext nneg <4 x i8> %a to <4 x i32>