diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45a37622a531b..a1646cf842afd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1679,81 +1679,81 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, Elt = ConstantInt::get(*getContext(), Elt->getValue()); // In some cases the vector type is legal but the element type is illegal and - // needs to be promoted, for example v8i8 on ARM. In this case, promote the - // inserted value (the type does not need to match the vector element type). - // Any extra bits introduced will be truncated away. - if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == - TargetLowering::TypePromoteInteger) { - EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); - APInt NewVal; - if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT)) - NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits()); - else - NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); - Elt = ConstantInt::get(*getContext(), NewVal); - } - // In other cases the element type is illegal and needs to be expanded, for - // example v2i64 on MIPS32. In this case, find the nearest legal type, split - // the value into n parts and use a vector type with n-times the elements. - // Then bitcast to the type requested. - // Legalizing constants too early makes the DAGCombiner's job harder so we - // only legalize if the DAG tells us we must produce legal types. - else if (NewNodesMustHaveLegalTypes && VT.isVector() && - TLI->getTypeAction(*getContext(), EltVT) == - TargetLowering::TypeExpandInteger) { - const APInt &NewVal = Elt->getValue(); - EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); - unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); - - // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. - if (VT.isScalableVector() || - TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) { - assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && - "Can only handle an even split!"); - unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; - - SmallVector ScalarParts; - for (unsigned i = 0; i != Parts; ++i) - ScalarParts.push_back(getConstant( - NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, - ViaEltVT, isT, isO)); - - return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts); - } + // thus when necessary we "legalise" the constant here so as to simplify the + // job of calling this function. NOTE: Only legalize when necessary so that + // we don't make DAGCombiner's job harder. + if (NewNodesMustHaveLegalTypes && VT.isVector()) { + // Promote the inserted value (the type does not need to match the vector + // element type). Any extra bits introduced will be truncated away. + if (TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypePromoteInteger) { + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + APInt NewVal; + if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT)) + NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits()); + else + NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); + Elt = ConstantInt::get(*getContext(), NewVal); + } + // For expansion we find the nearest legal type, split the value into n + // parts and use a vector type with n-times the elements. Then bitcast to + // the type requested. + else if (TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + const APInt &NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + + // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. + if (VT.isScalableVector() || + TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) { + assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && + "Can only handle an even split!"); + unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; + + SmallVector ScalarParts; + for (unsigned i = 0; i != Parts; ++i) + ScalarParts.push_back(getConstant( + NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, + ViaEltVT, isT, isO)); + + return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts); + } - unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; - EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); - // Check the temporary vector is the correct size. If this fails then - // getTypeToTransformTo() probably returned a type whose size (in bits) - // isn't a power-of-2 factor of the requested type size. - assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); - SmallVector EltParts; - for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) - EltParts.push_back(getConstant( - NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, - ViaEltVT, isT, isO)); + SmallVector EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) + EltParts.push_back(getConstant( + NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, + ViaEltVT, isT, isO)); - // EltParts is currently in little endian order. If we actually want - // big-endian order then reverse it now. - if (getDataLayout().isBigEndian()) - std::reverse(EltParts.begin(), EltParts.end()); + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (getDataLayout().isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); - // The elements must be reversed when the element order is different - // to the endianness of the elements (because the BITCAST is itself a - // vector shuffle in this situation). However, we do not need any code to - // perform this reversal because getConstant() is producing a vector - // splat. - // This situation occurs in MIPS MSA. + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. - SmallVector Ops; - for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) - llvm::append_range(Ops, EltParts); + SmallVector Ops; + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) + llvm::append_range(Ops, EltParts); - SDValue V = - getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); - return V; + SDValue V = + getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); + return V; + } } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4f548cbad5c30..c63eb7fc6b374 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1791,26 +1791,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast(V)) { EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); - if (const ConstantInt *CI = dyn_cast(C)) { - SDLoc DL = getCurSDLoc(); - - // DAG.getConstant() may attempt to legalise the vector constant which can - // significantly change the combines applied to the DAG. To reduce the - // divergence when enabling ConstantInt based vectors we try to construct - // the DAG in the same way as shufflevector based splats. TODO: The - // divergence sometimes leads to better optimisations. Ideally we should - // prevent DAG.getConstant() from legalising too early but there are some - // degradations preventing this. - if (VT.isScalableVector()) - return DAG.getNode( - ISD::SPLAT_VECTOR, DL, VT, - DAG.getConstant(CI->getValue(), DL, VT.getVectorElementType())); - if (VT.isFixedLengthVector()) - return DAG.getSplatBuildVector( - VT, DL, - DAG.getConstant(CI->getValue(), DL, VT.getVectorElementType())); - return DAG.getConstant(*CI, DL, VT); - } + if (const ConstantInt *CI = dyn_cast(C)) + return DAG.getConstant(*CI, getCurSDLoc(), VT); if (const GlobalValue *GV = dyn_cast(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2eadcc5416c28..8276678b40711 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45540,6 +45540,10 @@ static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG, if (!sd_match(Op, m_OneUse(m_BitwiseLogic(m_Value(LHS), m_Value(RHS))))) return SDValue(); + // WIP: Fixes one of the failures but triggers more. + //if (isBitwiseNot(Op)) + // return SDValue(); + // If either operand was bitcast from DstVT, then perform logic with DstVT (at // least one of the getBitcast() will fold away). if (sd_match(LHS, m_OneUse(m_BitCast(m_SpecificVT(DstVT)))) || @@ -48138,8 +48142,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Check if the first operand is all zeros and Cond type is vXi1. // If this an avx512 target we can improve the use of zero masking by // swapping the operands and inverting the condition. - if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() && - Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 && + if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT && + Cond.hasOneUse() && Subtarget.hasAVX512() && + CondVT.getVectorElementType() == MVT::i1 && ISD::isBuildVectorAllZeros(LHS.getNode()) && !ISD::isBuildVectorAllZeros(RHS.getNode())) { // Invert the cond to not(cond) : xor(op,allones)=not(op) diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll index bdbebd8726fde..1be02ae602a3c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll @@ -8,9 +8,9 @@ define <16 x i8> @div16xi8(<16 x i8> %x) { ; CHECK-SD-NEXT: movi v1.16b, #41 ; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b -; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2 -; CHECK-SD-NEXT: usra v0.16b, v0.16b, #7 +; CHECK-SD-NEXT: uzp2 v1.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: sshr v0.16b, v1.16b, #2 +; CHECK-SD-NEXT: usra v0.16b, v1.16b, #7 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: div16xi8: @@ -78,9 +78,9 @@ define <8 x i16> @div8xi16(<8 x i16> %x) { ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h -; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #12 -; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15 +; CHECK-SD-NEXT: add v1.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: sshr v0.8h, v1.8h, #12 +; CHECK-SD-NEXT: usra v0.8h, v1.8h, #15 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: div8xi16: diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll index b165ac0d56d20..6c8ebc65a327c 100644 --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -14,10 +14,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] ; CHECK-NEXT: adrp x8, .LCPI0_3 -; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3] -; CHECK-NEXT: usra v1.4h, v1.4h, #15 -; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h +; CHECK-NEXT: sshl v2.4h, v1.4h, v2.4h +; CHECK-NEXT: usra v2.4h, v1.4h, #15 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_3] +; CHECK-NEXT: mls v0.4h, v2.4h, v1.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -27,14 +27,14 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #44151 // =0xac77 -; CHECK-NEXT: movi v2.4h, #95 +; CHECK-NEXT: movi v3.4h, #95 ; CHECK-NEXT: dup v1.4h, w8 ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: add v1.4h, v1.4h, v0.4h -; CHECK-NEXT: sshr v1.4h, v1.4h, #6 -; CHECK-NEXT: usra v1.4h, v1.4h, #15 -; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h +; CHECK-NEXT: sshr v2.4h, v1.4h, #6 +; CHECK-NEXT: usra v2.4h, v1.4h, #15 +; CHECK-NEXT: mls v0.4h, v2.4h, v3.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -46,15 +46,15 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; CHECK-LABEL: combine_srem_sdiv: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #44151 // =0xac77 -; CHECK-NEXT: movi v2.4h, #95 +; CHECK-NEXT: movi v3.4h, #95 ; CHECK-NEXT: dup v1.4h, w8 ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: add v1.4h, v1.4h, v0.4h -; CHECK-NEXT: sshr v1.4h, v1.4h, #6 -; CHECK-NEXT: usra v1.4h, v1.4h, #15 -; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: sshr v2.4h, v1.4h, #6 +; CHECK-NEXT: usra v2.4h, v1.4h, #15 +; CHECK-NEXT: mls v0.4h, v2.4h, v3.4h +; CHECK-NEXT: add v0.4h, v0.4h, v2.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -74,10 +74,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: add v1.4h, v1.4h, v0.4h -; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2] -; CHECK-NEXT: usra v1.4h, v1.4h, #15 -; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h +; CHECK-NEXT: sshl v2.4h, v1.4h, v2.4h +; CHECK-NEXT: usra v2.4h, v1.4h, #15 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_2] +; CHECK-NEXT: mls v0.4h, v2.4h, v1.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -91,14 +91,14 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; CHECK-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: and v2.8b, v0.8b, v2.8b ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1] -; CHECK-NEXT: adrp x8, .LCPI4_2 -; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-NEXT: ushr v2.4h, v1.4h, #15 +; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h ; CHECK-NEXT: mov v2.h[0], wzr ; CHECK-NEXT: add v1.4h, v1.4h, v2.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] @@ -118,12 +118,12 @@ define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) { ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_2 ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI5_2] +; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2] -; CHECK-NEXT: adrp x8, .LCPI5_3 -; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-NEXT: ushr v2.4h, v1.4h, #15 +; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h ; CHECK-NEXT: mov v2.h[0], wzr ; CHECK-NEXT: add v1.4h, v1.4h, v2.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_3] @@ -181,13 +181,13 @@ define <16 x i8> @fold_srem_v16i8(<16 x i8> %x) { ; CHECK-LABEL: fold_srem_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.16b, #103 +; CHECK-NEXT: movi v3.16b, #10 ; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: smull v1.8h, v0.8b, v1.8b ; CHECK-NEXT: uzp2 v1.16b, v1.16b, v2.16b -; CHECK-NEXT: movi v2.16b, #10 -; CHECK-NEXT: sshr v1.16b, v1.16b, #2 -; CHECK-NEXT: usra v1.16b, v1.16b, #7 -; CHECK-NEXT: mls v0.16b, v1.16b, v2.16b +; CHECK-NEXT: sshr v2.16b, v1.16b, #2 +; CHECK-NEXT: usra v2.16b, v1.16b, #7 +; CHECK-NEXT: mls v0.16b, v2.16b, v3.16b ; CHECK-NEXT: ret %1 = srem <16 x i8> %x, ret <16 x i8> %1 @@ -199,8 +199,8 @@ define <8 x i8> @fold_srem_v8i8(<8 x i8> %x) { ; CHECK-NEXT: movi v1.8b, #103 ; CHECK-NEXT: movi v2.8b, #10 ; CHECK-NEXT: smull v1.8h, v0.8b, v1.8b -; CHECK-NEXT: shrn v1.8b, v1.8h, #8 -; CHECK-NEXT: sshr v1.8b, v1.8b, #2 +; CHECK-NEXT: sshr v1.8h, v1.8h, #10 +; CHECK-NEXT: xtn v1.8b, v1.8h ; CHECK-NEXT: usra v1.8b, v1.8b, #7 ; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret @@ -212,14 +212,14 @@ define <8 x i16> @fold_srem_v8i16(<8 x i16> %x) { ; CHECK-LABEL: fold_srem_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #26215 // =0x6667 +; CHECK-NEXT: movi v3.8h, #10 ; CHECK-NEXT: dup v1.8h, w8 ; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; CHECK-NEXT: movi v2.8h, #10 -; CHECK-NEXT: sshr v1.8h, v1.8h, #2 -; CHECK-NEXT: usra v1.8h, v1.8h, #15 -; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h +; CHECK-NEXT: sshr v2.8h, v1.8h, #2 +; CHECK-NEXT: usra v2.8h, v1.8h, #15 +; CHECK-NEXT: mls v0.8h, v2.8h, v3.8h ; CHECK-NEXT: ret %1 = srem <8 x i16> %x, ret <8 x i16> %1 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 3af858713525b..7e95f61604620 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -356,9 +356,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.16b, #1 -; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll index 8a504cd739211..944071b9d2161 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll @@ -176,10 +176,11 @@ define <1 x i32> @test_compress_v1i32_with_sve(<1 x i32> %vec, <1 x i1> %mask) { ; CHECK-LABEL: test_compress_v1i32_with_sve: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sbfx w8, w0, #0, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: mov v1.s[0], w8 +; CHECK-NEXT: mov v1.s[0], w0 +; CHECK-NEXT: shl v1.2s, v1.2s, #31 +; CHECK-NEXT: cmlt v1.2s, v1.2s, #0 ; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll index 468a33ce5bfcf..bd7952a7992c6 100644 --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -186,8 +186,8 @@ define <8 x i8> @fold_urem_v8i8(<8 x i8> %x) { ; CHECK-NEXT: movi v1.8b, #205 ; CHECK-NEXT: movi v2.8b, #10 ; CHECK-NEXT: umull v1.8h, v0.8b, v1.8b -; CHECK-NEXT: shrn v1.8b, v1.8h, #8 -; CHECK-NEXT: ushr v1.8b, v1.8b, #3 +; CHECK-NEXT: ushr v1.8h, v1.8h, #11 +; CHECK-NEXT: xtn v1.8b, v1.8h ; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %1 = urem <8 x i8> %x, diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index a71cf95a728db..34d9294ac7f3c 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -345,9 +345,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.16b, #1 -; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/llvm/test/CodeGen/ARM/bool-ext-inc.ll b/llvm/test/CodeGen/ARM/bool-ext-inc.ll index 00a7fcdee3caa..80e89139389b1 100644 --- a/llvm/test/CodeGen/ARM/bool-ext-inc.ll +++ b/llvm/test/CodeGen/ARM/bool-ext-inc.ll @@ -14,9 +14,8 @@ define i32 @sext_inc(i1 zeroext %x) { define <4 x i32> @sext_inc_vec(<4 x i1> %x) { ; CHECK-LABEL: sext_inc_vec: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i16 d16, #0x1 -; CHECK-NEXT: vmov d17, r0, r1 -; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmvn d16, d16 ; CHECK-NEXT: vmov.i32 q9, #0x1 ; CHECK-NEXT: vmovl.u16 q8, d16 ; CHECK-NEXT: vand q8, q8, q9 diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll index 8f917becafec0..7484068e236ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll @@ -197,7 +197,7 @@ define @vselect_add_const_signbit_nxv2i16( define <2 x i16> @vselect_xor_const_signbit_v2i16(<2 x i16> %a0) { ; CHECK-LABEL: vselect_xor_const_signbit_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret @@ -210,7 +210,7 @@ define <2 x i16> @vselect_xor_const_signbit_v2i16(<2 x i16> %a0) { define @vselect_xor_const_signbit_nxv2i16( %a0) { ; CHECK-LABEL: vselect_xor_const_signbit_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index dba5d26c216fa..7a347ec4aacd1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -1345,18 +1345,9 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0) ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64NOM-NEXT: vle32.v v9, (a0) -; RV64NOM-NEXT: lui a0, 1044480 -; RV64NOM-NEXT: vmv.s.x v10, a0 -; RV64NOM-NEXT: lui a0, 12320 -; RV64NOM-NEXT: addi a0, a0, 257 -; RV64NOM-NEXT: vsext.vf4 v11, v10 -; RV64NOM-NEXT: vand.vv v10, v8, v11 ; RV64NOM-NEXT: vmulh.vv v8, v8, v9 -; RV64NOM-NEXT: vmv.s.x v9, a0 -; RV64NOM-NEXT: vadd.vv v8, v8, v10 -; RV64NOM-NEXT: vsext.vf4 v10, v9 -; RV64NOM-NEXT: vsra.vv v8, v8, v10 ; RV64NOM-NEXT: vsrl.vi v9, v8, 31 +; RV64NOM-NEXT: vsra.vi v8, v8, 2 ; RV64NOM-NEXT: vadd.vv v8, v8, v9 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 ; RV64NOM-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 0c30cbe4a42ef..fba07787696a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1269,8 +1269,8 @@ define void @mulhs_v8i16(ptr %x) { ; CHECK-NEXT: addi a1, a1, 1755 ; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0 ; CHECK-NEXT: vmulh.vv v8, v8, v9 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -1327,8 +1327,8 @@ define void @mulhs_v4i32(ptr %x) { ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmulh.vv v8, v8, v9 -; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vsrl.vi v9, v8, 31 +; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret @@ -3410,8 +3410,8 @@ define void @mulhs_v16i16(ptr %x) { ; CHECK-NEXT: addi a1, a1, 1755 ; CHECK-NEXT: vmerge.vxm v10, v10, a1, v0 ; CHECK-NEXT: vmulh.vv v8, v8, v10 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v10, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -3451,8 +3451,8 @@ define void @mulhs_v8i32(ptr %x) { ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vmulh.vv v8, v8, v10 -; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vsrl.vi v10, v8, 31 +; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret @@ -5564,8 +5564,8 @@ define void @mulhs_vx_v8i16(ptr %x) { ; CHECK-NEXT: lui a1, 5 ; CHECK-NEXT: addi a1, a1, -1755 ; CHECK-NEXT: vmulh.vx v8, v8, a1 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -5576,31 +5576,18 @@ define void @mulhs_vx_v8i16(ptr %x) { } define void @mulhs_vx_v4i32(ptr %x) { -; RV32-LABEL: mulhs_vx_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a1, 629146 -; RV32-NEXT: addi a1, a1, -1639 -; RV32-NEXT: vmulh.vx v8, v8, a1 -; RV32-NEXT: vsrl.vi v9, v8, 31 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhs_vx_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a1, 629146 -; RV64-NEXT: addi a1, a1, -1639 -; RV64-NEXT: vmulh.vx v8, v8, a1 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vsrl.vi v9, v8, 31 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhs_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: lui a1, 629146 +; CHECK-NEXT: addi a1, a1, -1639 +; CHECK-NEXT: vmulh.vx v8, v8, a1 +; CHECK-NEXT: vsrl.vi v9, v8, 31 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x %b = sdiv <4 x i32> %a, store <4 x i32> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index 22956f8fe3551..9d3fe3a90b463 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -47,9 +47,9 @@ define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: select_addsub_v4i32_select_swapped: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmnot.m v0, v0 -; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v10, v9, 0 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b @@ -74,9 +74,9 @@ define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4 define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: select_addsub_v4i32_both_swapped: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmnot.m v0, v0 -; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vrsub.vi v10, v9, 0 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %sub = sub <4 x i32> %a, %b diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 7afd31fdd663c..8e85b1486bacb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -18,7 +18,7 @@ define <8 x i7> @vsadd_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t -; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: li a0, -64 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 6ddf2e464750e..8430a2ebb7896 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -18,7 +18,7 @@ define <8 x i7> @vssub_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t ; CHECK-NEXT: vmin.vx v8, v8, a1, v0.t -; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: li a0, -64 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll index a189711d11471..d579fb82de536 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll @@ -595,7 +595,7 @@ define <1 x i32> @vqdotu_vx_partial_reduce(<4 x i8> %a, <4 x i8> %b) { ; DOT: # %bb.0: # %entry ; DOT-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; DOT-NEXT: vmv.s.x v9, zero -; DOT-NEXT: li a0, 128 +; DOT-NEXT: li a0, -128 ; DOT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; DOT-NEXT: vmv.v.x v10, a0 ; DOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -631,7 +631,7 @@ define <1 x i32> @vqdot_vx_partial_reduce(<4 x i8> %a, <4 x i8> %b) { ; DOT: # %bb.0: # %entry ; DOT-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; DOT-NEXT: vmv.s.x v9, zero -; DOT-NEXT: li a0, 128 +; DOT-NEXT: li a0, -128 ; DOT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; DOT-NEXT: vmv.v.x v10, a0 ; DOT-NEXT: vsetivli zero, 1, e32, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll index de15e185998c4..489302c3ce722 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll @@ -33,8 +33,8 @@ define @vdiv_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v8, v9, v8 -; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i8 -7) @@ -90,8 +90,8 @@ define @vdiv_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v8, v9, v8 -; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i8 -7) @@ -127,8 +127,8 @@ define @vdiv_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v8, v9, v8 -; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i8 -7) @@ -164,8 +164,8 @@ define @vdiv_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v8, v9, v8 -; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vsrl.vi v9, v8, 7 +; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i8 -7) @@ -201,8 +201,8 @@ define @vdiv_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmulh.vx v10, v8, a0 ; CHECK-NEXT: vsub.vv v8, v10, v8 -; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vsrl.vi v10, v8, 7 +; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i8 -7) @@ -238,8 +238,8 @@ define @vdiv_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmulh.vx v12, v8, a0 ; CHECK-NEXT: vsub.vv v8, v12, v8 -; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vsrl.vi v12, v8, 7 +; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i8 -7) @@ -275,8 +275,8 @@ define @vdiv_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmulh.vx v16, v8, a0 ; CHECK-NEXT: vsub.vv v8, v16, v8 -; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vsrl.vi v16, v8, 7 +; CHECK-NEXT: vsra.vi v8, v8, 2 ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i8 -7) @@ -312,8 +312,8 @@ define @vdiv_vi_nxv1i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i16 -7) @@ -349,8 +349,8 @@ define @vdiv_vi_nxv2i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i16 -7) @@ -386,8 +386,8 @@ define @vdiv_vi_nxv4i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v9, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i16 -7) @@ -423,8 +423,8 @@ define @vdiv_vi_nxv8i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v10, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i16 -7) @@ -460,8 +460,8 @@ define @vdiv_vi_nxv16i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v12, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i16 -7) @@ -497,8 +497,8 @@ define @vdiv_vi_nxv32i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsrl.vi v16, v8, 15 +; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %vc = sdiv %va, splat (i16 -7) @@ -528,29 +528,17 @@ define @vdiv_vx_nxv1i32( %va, i32 signext % } define @vdiv_vi_nxv1i32_0( %va) { -; RV32-LABEL: vdiv_vi_nxv1i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmulh.vx v9, v8, a0 -; RV32-NEXT: vsub.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 31 -; RV32-NEXT: vsra.vi v8, v8, 2 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv1i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: vsub.vv v8, v9, v8 -; RV64-NEXT: vsra.vi v8, v8, 2 -; RV64-NEXT: vsrl.vi v9, v8, 31 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv1i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a0 +; CHECK-NEXT: vsub.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 31 +; CHECK-NEXT: vsra.vi v8, v8, 2 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -578,29 +566,17 @@ define @vdiv_vx_nxv2i32( %va, i32 signext % } define @vdiv_vi_nxv2i32_0( %va) { -; RV32-LABEL: vdiv_vi_nxv2i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmulh.vx v9, v8, a0 -; RV32-NEXT: vsub.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 31 -; RV32-NEXT: vsra.vi v8, v8, 2 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv2i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: vsub.vv v8, v9, v8 -; RV64-NEXT: vsra.vi v8, v8, 2 -; RV64-NEXT: vsrl.vi v9, v8, 31 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a0 +; CHECK-NEXT: vsub.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 31 +; CHECK-NEXT: vsra.vi v8, v8, 2 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -628,29 +604,17 @@ define @vdiv_vx_nxv4i32( %va, i32 signext % } define @vdiv_vi_nxv4i32_0( %va) { -; RV32-LABEL: vdiv_vi_nxv4i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmulh.vx v10, v8, a0 -; RV32-NEXT: vsub.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 31 -; RV32-NEXT: vsra.vi v8, v8, 2 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv4i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmulh.vx v10, v8, a0 -; RV64-NEXT: vsub.vv v8, v10, v8 -; RV64-NEXT: vsra.vi v8, v8, 2 -; RV64-NEXT: vsrl.vi v10, v8, 31 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmulh.vx v10, v8, a0 +; CHECK-NEXT: vsub.vv v8, v10, v8 +; CHECK-NEXT: vsrl.vi v10, v8, 31 +; CHECK-NEXT: vsra.vi v8, v8, 2 +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: ret %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -678,29 +642,17 @@ define @vdiv_vx_nxv8i32( %va, i32 signext % } define @vdiv_vi_nxv8i32_0( %va) { -; RV32-LABEL: vdiv_vi_nxv8i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vmulh.vx v12, v8, a0 -; RV32-NEXT: vsub.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 31 -; RV32-NEXT: vsra.vi v8, v8, 2 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv8i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vmulh.vx v12, v8, a0 -; RV64-NEXT: vsub.vv v8, v12, v8 -; RV64-NEXT: vsra.vi v8, v8, 2 -; RV64-NEXT: vsrl.vi v12, v8, 31 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmulh.vx v12, v8, a0 +; CHECK-NEXT: vsub.vv v8, v12, v8 +; CHECK-NEXT: vsrl.vi v12, v8, 31 +; CHECK-NEXT: vsra.vi v8, v8, 2 +; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: ret %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -728,29 +680,17 @@ define @vdiv_vx_nxv16i32( %va, i32 signex } define @vdiv_vi_nxv16i32_0( %va) { -; RV32-LABEL: vdiv_vi_nxv16i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmulh.vx v16, v8, a0 -; RV32-NEXT: vsub.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 31 -; RV32-NEXT: vsra.vi v8, v8, 2 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vdiv_vi_nxv16i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmulh.vx v16, v8, a0 -; RV64-NEXT: vsub.vv v8, v16, v8 -; RV64-NEXT: vsra.vi v8, v8, 2 -; RV64-NEXT: vsrl.vi v16, v8, 31 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vdiv_vi_nxv16i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmulh.vx v16, v8, a0 +; CHECK-NEXT: vsub.vv v8, v16, v8 +; CHECK-NEXT: vsrl.vi v16, v8, 31 +; CHECK-NEXT: vsra.vi v8, v8, 2 +; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: ret %vc = sdiv %va, splat (i32 -7) ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll index 3fd7f5be860cf..c0c9b1797f91f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll @@ -48,18 +48,11 @@ define @vmulhu_vi_nxv1i32_0( %va) { } define @vmulhu_vi_nxv1i32_1( %va) { -; RV32-LABEL: vmulhu_vi_nxv1i32_1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 28 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv1i32_1: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 16 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv1i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 28 +; CHECK-NEXT: ret %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc @@ -114,18 +107,11 @@ define @vmulhu_vi_nxv2i32_0( %va) { } define @vmulhu_vi_nxv2i32_1( %va) { -; RV32-LABEL: vmulhu_vi_nxv2i32_1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 28 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv2i32_1: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 16 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv2i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 28 +; CHECK-NEXT: ret %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc @@ -180,18 +166,11 @@ define @vmulhu_vi_nxv4i32_0( %va) { } define @vmulhu_vi_nxv4i32_1( %va) { -; RV32-LABEL: vmulhu_vi_nxv4i32_1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 28 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv4i32_1: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 16 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv4i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 28 +; CHECK-NEXT: ret %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc @@ -246,18 +225,11 @@ define @vmulhu_vi_nxv8i32_0( %va) { } define @vmulhu_vi_nxv8i32_1( %va) { -; RV32-LABEL: vmulhu_vi_nxv8i32_1: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 28 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv8i32_1: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 16 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv8i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 28 +; CHECK-NEXT: ret %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc @@ -265,3 +237,6 @@ define @vmulhu_vi_nxv8i32_1( %va) { %vf = trunc %ve to ret %vf } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll index 9c21a626478e3..e3fad19bcb04c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -33,8 +33,8 @@ define @vrem_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v9, v9, v8 -; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vsrl.vi v10, v9, 7 +; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 @@ -85,8 +85,8 @@ define @vrem_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v9, v9, v8 -; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vsrl.vi v10, v9, 7 +; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 @@ -137,8 +137,8 @@ define @vrem_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v9, v9, v8 -; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vsrl.vi v10, v9, 7 +; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 @@ -189,8 +189,8 @@ define @vrem_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 ; CHECK-NEXT: vsub.vv v9, v9, v8 -; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vsrl.vi v10, v9, 7 +; CHECK-NEXT: vsra.vi v9, v9, 2 ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 @@ -241,8 +241,8 @@ define @vrem_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmulh.vx v10, v8, a0 ; CHECK-NEXT: vsub.vv v10, v10, v8 -; CHECK-NEXT: vsra.vi v10, v10, 2 ; CHECK-NEXT: vsrl.vi v12, v10, 7 +; CHECK-NEXT: vsra.vi v10, v10, 2 ; CHECK-NEXT: vadd.vv v10, v10, v12 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v10 @@ -293,8 +293,8 @@ define @vrem_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmulh.vx v12, v8, a0 ; CHECK-NEXT: vsub.vv v12, v12, v8 -; CHECK-NEXT: vsra.vi v12, v12, 2 ; CHECK-NEXT: vsrl.vi v16, v12, 7 +; CHECK-NEXT: vsra.vi v12, v12, 2 ; CHECK-NEXT: vadd.vv v12, v12, v16 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v12 @@ -345,8 +345,8 @@ define @vrem_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmulh.vx v16, v8, a0 ; CHECK-NEXT: vsub.vv v16, v16, v8 -; CHECK-NEXT: vsra.vi v16, v16, 2 ; CHECK-NEXT: vsrl.vi v24, v16, 7 +; CHECK-NEXT: vsra.vi v16, v16, 2 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 @@ -384,8 +384,8 @@ define @vrem_vi_nxv1i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 -; CHECK-NEXT: vsra.vi v9, v9, 1 ; CHECK-NEXT: vsrl.vi v10, v9, 15 +; CHECK-NEXT: vsra.vi v9, v9, 1 ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 @@ -436,8 +436,8 @@ define @vrem_vi_nxv2i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 -; CHECK-NEXT: vsra.vi v9, v9, 1 ; CHECK-NEXT: vsrl.vi v10, v9, 15 +; CHECK-NEXT: vsra.vi v9, v9, 1 ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 @@ -488,8 +488,8 @@ define @vrem_vi_nxv4i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmulh.vx v9, v8, a0 -; CHECK-NEXT: vsra.vi v9, v9, 1 ; CHECK-NEXT: vsrl.vi v10, v9, 15 +; CHECK-NEXT: vsra.vi v9, v9, 1 ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 @@ -540,8 +540,8 @@ define @vrem_vi_nxv8i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmulh.vx v10, v8, a0 -; CHECK-NEXT: vsra.vi v10, v10, 1 ; CHECK-NEXT: vsrl.vi v12, v10, 15 +; CHECK-NEXT: vsra.vi v10, v10, 1 ; CHECK-NEXT: vadd.vv v10, v10, v12 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v10 @@ -592,8 +592,8 @@ define @vrem_vi_nxv16i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmulh.vx v12, v8, a0 -; CHECK-NEXT: vsra.vi v12, v12, 1 ; CHECK-NEXT: vsrl.vi v16, v12, 15 +; CHECK-NEXT: vsra.vi v12, v12, 1 ; CHECK-NEXT: vadd.vv v12, v12, v16 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v12 @@ -644,8 +644,8 @@ define @vrem_vi_nxv32i16_0( %va) { ; CHECK-NEXT: addi a0, a0, 1755 ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmulh.vx v16, v8, a0 -; CHECK-NEXT: vsra.vi v16, v16, 1 ; CHECK-NEXT: vsrl.vi v24, v16, 15 +; CHECK-NEXT: vsra.vi v16, v16, 1 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 @@ -677,33 +677,19 @@ define @vrem_vx_nxv1i32( %va, i32 signext % } define @vrem_vi_nxv1i32_0( %va) { -; RV32-LABEL: vrem_vi_nxv1i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmulh.vx v9, v8, a0 -; RV32-NEXT: vsub.vv v9, v9, v8 -; RV32-NEXT: vsrl.vi v10, v9, 31 -; RV32-NEXT: vsra.vi v9, v9, 2 -; RV32-NEXT: vadd.vv v9, v9, v10 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv1i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: vsub.vv v9, v9, v8 -; RV64-NEXT: vsra.vi v9, v9, 2 -; RV64-NEXT: vsrl.vi v10, v9, 31 -; RV64-NEXT: vadd.vv v9, v9, v10 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv1i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a0 +; CHECK-NEXT: vsub.vv v9, v9, v8 +; CHECK-NEXT: vsrl.vi v10, v9, 31 +; CHECK-NEXT: vsra.vi v9, v9, 2 +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %vc = srem %va, splat (i32 -7) ret %vc } @@ -731,33 +717,19 @@ define @vrem_vx_nxv2i32( %va, i32 signext % } define @vrem_vi_nxv2i32_0( %va) { -; RV32-LABEL: vrem_vi_nxv2i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmulh.vx v9, v8, a0 -; RV32-NEXT: vsub.vv v9, v9, v8 -; RV32-NEXT: vsrl.vi v10, v9, 31 -; RV32-NEXT: vsra.vi v9, v9, 2 -; RV32-NEXT: vadd.vv v9, v9, v10 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv2i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: vsub.vv v9, v9, v8 -; RV64-NEXT: vsra.vi v9, v9, 2 -; RV64-NEXT: vsrl.vi v10, v9, 31 -; RV64-NEXT: vadd.vv v9, v9, v10 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmulh.vx v9, v8, a0 +; CHECK-NEXT: vsub.vv v9, v9, v8 +; CHECK-NEXT: vsrl.vi v10, v9, 31 +; CHECK-NEXT: vsra.vi v9, v9, 2 +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret %vc = srem %va, splat (i32 -7) ret %vc } @@ -785,33 +757,19 @@ define @vrem_vx_nxv4i32( %va, i32 signext % } define @vrem_vi_nxv4i32_0( %va) { -; RV32-LABEL: vrem_vi_nxv4i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmulh.vx v10, v8, a0 -; RV32-NEXT: vsub.vv v10, v10, v8 -; RV32-NEXT: vsrl.vi v12, v10, 31 -; RV32-NEXT: vsra.vi v10, v10, 2 -; RV32-NEXT: vadd.vv v10, v10, v12 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv4i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmulh.vx v10, v8, a0 -; RV64-NEXT: vsub.vv v10, v10, v8 -; RV64-NEXT: vsra.vi v10, v10, 2 -; RV64-NEXT: vsrl.vi v12, v10, 31 -; RV64-NEXT: vadd.vv v10, v10, v12 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmulh.vx v10, v8, a0 +; CHECK-NEXT: vsub.vv v10, v10, v8 +; CHECK-NEXT: vsrl.vi v12, v10, 31 +; CHECK-NEXT: vsra.vi v10, v10, 2 +; CHECK-NEXT: vadd.vv v10, v10, v12 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v10 +; CHECK-NEXT: ret %vc = srem %va, splat (i32 -7) ret %vc } @@ -839,33 +797,19 @@ define @vrem_vx_nxv8i32( %va, i32 signext % } define @vrem_vi_nxv8i32_0( %va) { -; RV32-LABEL: vrem_vi_nxv8i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vmulh.vx v12, v8, a0 -; RV32-NEXT: vsub.vv v12, v12, v8 -; RV32-NEXT: vsrl.vi v16, v12, 31 -; RV32-NEXT: vsra.vi v12, v12, 2 -; RV32-NEXT: vadd.vv v12, v12, v16 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v12 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv8i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vmulh.vx v12, v8, a0 -; RV64-NEXT: vsub.vv v12, v12, v8 -; RV64-NEXT: vsra.vi v12, v12, 2 -; RV64-NEXT: vsrl.vi v16, v12, 31 -; RV64-NEXT: vadd.vv v12, v12, v16 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v12 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmulh.vx v12, v8, a0 +; CHECK-NEXT: vsub.vv v12, v12, v8 +; CHECK-NEXT: vsrl.vi v16, v12, 31 +; CHECK-NEXT: vsra.vi v12, v12, 2 +; CHECK-NEXT: vadd.vv v12, v12, v16 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret %vc = srem %va, splat (i32 -7) ret %vc } @@ -893,33 +837,19 @@ define @vrem_vx_nxv16i32( %va, i32 signex } define @vrem_vi_nxv16i32_0( %va) { -; RV32-LABEL: vrem_vi_nxv16i32_0: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 449390 -; RV32-NEXT: addi a0, a0, -1171 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmulh.vx v16, v8, a0 -; RV32-NEXT: vsub.vv v16, v16, v8 -; RV32-NEXT: vsrl.vi v24, v16, 31 -; RV32-NEXT: vsra.vi v16, v16, 2 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: vrem_vi_nxv16i32_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 449390 -; RV64-NEXT: addi a0, a0, -1171 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmulh.vx v16, v8, a0 -; RV64-NEXT: vsub.vv v16, v16, v8 -; RV64-NEXT: vsra.vi v16, v16, 2 -; RV64-NEXT: vsrl.vi v24, v16, 31 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v16 -; RV64-NEXT: ret +; CHECK-LABEL: vrem_vi_nxv16i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 449390 +; CHECK-NEXT: addi a0, a0, -1171 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmulh.vx v16, v8, a0 +; CHECK-NEXT: vsub.vv v16, v16, v8 +; CHECK-NEXT: vsrl.vi v24, v16, 31 +; CHECK-NEXT: vsra.vi v16, v16, 2 +; CHECK-NEXT: vadd.vv v16, v16, v24 +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret %vc = srem %va, splat (i32 -7) ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll index e471f4b2e92b5..65f847f562d75 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll @@ -15,7 +15,7 @@ define @vsadd_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t -; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: li a0, -64 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index ebf8d5eeb40bc..b98da42697753 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -15,7 +15,7 @@ define @vssub_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t -; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: li a0, -64 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index 721ffbe1ceb79..c30688e9fb33d 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -743,7 +743,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; X86-AVX512F-LABEL: julia_issue36955: ; X86-AVX512F: # %bb.0: ; X86-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; X86-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0 +; X86-AVX512F-NEXT: vcmpnlepd %zmm0, %zmm1, %k0 +; X86-AVX512F-NEXT: knotw %k0, %k0 ; X86-AVX512F-NEXT: kmovw %k0, %eax ; X86-AVX512F-NEXT: # kill: def $al killed $al killed $eax ; X86-AVX512F-NEXT: vzeroupper @@ -752,7 +753,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; X64-AVX512F-LABEL: julia_issue36955: ; X64-AVX512F: # %bb.0: ; X64-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; X64-AVX512F-NEXT: vcmplepd %zmm0, %zmm1, %k0 +; X64-AVX512F-NEXT: vcmpnlepd %zmm0, %zmm1, %k0 +; X64-AVX512F-NEXT: knotw %k0, %k0 ; X64-AVX512F-NEXT: kmovw %k0, %eax ; X64-AVX512F-NEXT: # kill: def $al killed $al killed $eax ; X64-AVX512F-NEXT: vzeroupper @@ -761,7 +763,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; X86-AVX512BW-LABEL: julia_issue36955: ; X86-AVX512BW: # %bb.0: ; X86-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; X86-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0 +; X86-AVX512BW-NEXT: vcmpnlepd %zmm0, %zmm1, %k0 +; X86-AVX512BW-NEXT: knotw %k0, %k0 ; X86-AVX512BW-NEXT: kmovd %k0, %eax ; X86-AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; X86-AVX512BW-NEXT: vzeroupper @@ -770,7 +773,8 @@ define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { ; X64-AVX512BW-LABEL: julia_issue36955: ; X64-AVX512BW: # %bb.0: ; X64-AVX512BW-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; X64-AVX512BW-NEXT: vcmplepd %zmm0, %zmm1, %k0 +; X64-AVX512BW-NEXT: vcmpnlepd %zmm0, %zmm1, %k0 +; X64-AVX512BW-NEXT: knotw %k0, %k0 ; X64-AVX512BW-NEXT: kmovd %k0, %eax ; X64-AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; X64-AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll index b3bb01137c70d..9a72bdb1d41f4 100644 --- a/llvm/test/CodeGen/X86/vselect-zero.ll +++ b/llvm/test/CodeGen/X86/vselect-zero.ll @@ -56,7 +56,8 @@ define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) { ; ; AVX512-LABEL: test2: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k1 +; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 +; AVX512-NEXT: knotw %k0, %k1 ; AVX512-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %cond = fcmp oeq <4 x float> %a, %b diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index 7cddebdca5cca..d73fd876649fa 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -478,7 +478,8 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(ptr %ptr) nounwind { ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] ; AVX1-NEXT: vpcmpeqb %xmm0, %xmm5, %xmm0 ; AVX1-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: interleaved_load_vf16_i8_stride4: @@ -517,7 +518,8 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(ptr %ptr) nounwind { ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] ; AVX2-NEXT: vpcmpeqb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -633,7 +635,9 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(ptr %ptr) nounwind { ; AVX1-NEXT: vinsertf128 $1, %xmm9, %ymm8, %ymm2 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vxorps %ymm0, %ymm2, %ymm0 -; AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: interleaved_load_vf32_i8_stride4: @@ -698,7 +702,8 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(ptr %ptr) nounwind { ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7] ; AVX2-NEXT: vpcmpeqb %ymm0, %ymm6, %ymm0 ; AVX2-NEXT: vpxor %ymm0, %ymm5, %ymm0 -; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: interleaved_load_vf32_i8_stride4: