diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f5334e8c63964a..3bdf15b08e0f31 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22755,16 +22755,22 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, /// Transform a vector binary operation into a scalar binary operation by moving /// the math/logic after an extract element of a vector. -static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, - const SDLoc &DL, bool LegalOperations) { +static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, + const SDLoc &DL, bool LegalTypes) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast(Index); - if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || + unsigned Opc = Vec.getOpcode(); + if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) || Vec->getNumValues() != 1) return SDValue(); + EVT ResVT = ExtElt->getValueType(0); + if (Opc == ISD::SETCC && + (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes)) + return SDValue(); + // Targets may want to avoid this to prevent an expensive register transfer. if (!TLI.shouldScalarizeBinop(Vec)) return SDValue(); @@ -22775,19 +22781,24 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, SDValue Op0 = Vec.getOperand(0); SDValue Op1 = Vec.getOperand(1); APInt SplatVal; - if (isAnyConstantBuildVector(Op0, true) || - ISD::isConstantSplatVector(Op0.getNode(), SplatVal) || - isAnyConstantBuildVector(Op1, true) || - ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) { - // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' - // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) - EVT VT = ExtElt->getValueType(0); - SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); - SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); - return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); - } + if (!isAnyConstantBuildVector(Op0, true) && + !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) && + !isAnyConstantBuildVector(Op1, true) && + !ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) + return SDValue(); - return SDValue(); + // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C' + // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC) + if (Opc == ISD::SETCC) { + EVT OpVT = Op0.getValueType().getVectorElementType(); + Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index); + Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index); + return DAG.getSetCC(DL, ResVT, Op0, Op1, + cast(Vec->getOperand(2))->get()); + } + Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index); + Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index); + return DAG.getNode(Opc, DL, ResVT, Op0, Op1); } // Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract, @@ -23020,7 +23031,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations)) + if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes)) return BO; if (VecVT.isScalableVector()) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 986d69e6c7a9e0..69f25ebc88004e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2835,6 +2835,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break; + case ISD::SETCC: ExpandIntRes_SETCC(N, Lo, Hi); break; case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; @@ -3316,6 +3317,20 @@ static std::pair getExpandedMinMaxOps(int Op) { } } +void DAGTypeLegalizer::ExpandIntRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc DL(N); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT NewVT = getSetCCResultType(LHS.getValueType()); + + // Taking the same approach as ScalarizeVecRes_SETCC + SDValue Res = DAG.getNode(ISD::SETCC, DL, NewVT, LHS, RHS, N->getOperand(2)); + + Res = DAG.getBoolExtOrTrunc(Res, DL, N->getValueType(0), NewVT); + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1703149aca7463..571a710cc92a34 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -487,6 +487,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CMP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SETCC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index cb0b9e965277aa..d51b36f7e49946 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1348,6 +1348,10 @@ class AArch64TargetLowering : public TargetLowering { unsigned getMinimumJumpTableEntries() const override; bool softPromoteHalfType() const override { return true; } + + bool shouldScalarizeBinop(SDValue VecOp) const override { + return VecOp.getOpcode() == ISD::SETCC; + } }; namespace AArch64 { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5cd3a3f1c32e66..40c2a68e40853d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2107,7 +2107,7 @@ bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // Assume target opcodes can't be scalarized. // TODO - do we have any exceptions? - if (Opc >= ISD::BUILTIN_OP_END) + if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) return false; // If the vector op is not supported, try to convert to scalar. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index c765d2b1ab95bc..7712570869ff6c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -429,7 +429,7 @@ bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // Assume target opcodes can't be scalarized. // TODO - do we have any exceptions? - if (Opc >= ISD::BUILTIN_OP_END) + if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) return false; // If the vector op is not supported, try to convert to scalar. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 59b730d52a4b3e..db9d5879066ba6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3306,7 +3306,7 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { // Assume target opcodes can't be scalarized. // TODO - do we have any exceptions? - if (Opc >= ISD::BUILTIN_OP_END) + if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc)) return false; // If the vector op is not supported, try to convert to scalar. diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll index 5a5dee0b53d439..4cb1d5b2fb345d 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll @@ -5,7 +5,7 @@ declare void @llvm.masked.scatter.nxv16i8.nxv16p0(, , i32 immarg, ) -define fastcc i8 @allocno_reload_assign() { +define fastcc i8 @allocno_reload_assign(ptr %p) { ; CHECK-LABEL: allocno_reload_assign: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, xzr @@ -14,8 +14,8 @@ define fastcc i8 @allocno_reload_assign() { ; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s ; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h -; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: uzp1 p8.b, p0.b, p0.b +; CHECK-NEXT: mov z0.b, p8/z, #1 // =0x1 ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: uunpklo z1.h, z0.b @@ -30,34 +30,35 @@ define fastcc i8 @allocno_reload_assign() { ; CHECK-NEXT: punpklo p1.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: punpklo p2.h, p1.b -; CHECK-NEXT: punpkhi p3.h, p1.b +; CHECK-NEXT: punpkhi p4.h, p1.b ; CHECK-NEXT: uunpklo z0.d, z2.s ; CHECK-NEXT: uunpkhi z1.d, z2.s -; CHECK-NEXT: punpklo p5.h, p0.b +; CHECK-NEXT: punpklo p6.h, p0.b ; CHECK-NEXT: uunpklo z2.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: punpkhi p7.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpklo z4.d, z5.s ; CHECK-NEXT: uunpkhi z5.d, z5.s ; CHECK-NEXT: uunpklo z6.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: punpklo p0.h, p2.b -; CHECK-NEXT: punpkhi p1.h, p2.b -; CHECK-NEXT: punpklo p2.h, p3.b -; CHECK-NEXT: punpkhi p3.h, p3.b -; CHECK-NEXT: punpklo p4.h, p5.b -; CHECK-NEXT: punpkhi p5.h, p5.b -; CHECK-NEXT: punpklo p6.h, p7.b -; CHECK-NEXT: punpkhi p7.h, p7.b +; CHECK-NEXT: punpklo p1.h, p2.b +; CHECK-NEXT: punpkhi p2.h, p2.b +; CHECK-NEXT: punpklo p3.h, p4.b +; CHECK-NEXT: punpkhi p4.h, p4.b +; CHECK-NEXT: punpklo p5.h, p6.b +; CHECK-NEXT: punpkhi p6.h, p6.b +; CHECK-NEXT: punpklo p7.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: st1b { z0.d }, p0, [z16.d] -; CHECK-NEXT: st1b { z1.d }, p1, [z16.d] -; CHECK-NEXT: st1b { z2.d }, p2, [z16.d] -; CHECK-NEXT: st1b { z3.d }, p3, [z16.d] -; CHECK-NEXT: st1b { z4.d }, p4, [z16.d] -; CHECK-NEXT: st1b { z5.d }, p5, [z16.d] -; CHECK-NEXT: st1b { z6.d }, p6, [z16.d] -; CHECK-NEXT: st1b { z7.d }, p7, [z16.d] +; CHECK-NEXT: st1b { z0.d }, p1, [z16.d] +; CHECK-NEXT: st1b { z1.d }, p2, [z16.d] +; CHECK-NEXT: st1b { z2.d }, p3, [z16.d] +; CHECK-NEXT: st1b { z3.d }, p4, [z16.d] +; CHECK-NEXT: st1b { z4.d }, p5, [z16.d] +; CHECK-NEXT: st1b { z5.d }, p6, [z16.d] +; CHECK-NEXT: st1b { z6.d }, p7, [z16.d] +; CHECK-NEXT: st1b { z7.d }, p0, [z16.d] +; CHECK-NEXT: str p8, [x0] ; CHECK-NEXT: b .LBB0_1 br label %1 @@ -66,6 +67,7 @@ define fastcc i8 @allocno_reload_assign() { %constexpr1 = shufflevector %constexpr, poison, zeroinitializer %constexpr2 = xor %constexpr1, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) call void @llvm.masked.scatter.nxv16i8.nxv16p0( zeroinitializer, zeroinitializer, i32 0, %constexpr2) + store %constexpr, ptr %p, align 16 br label %1 } diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll new file mode 100644 index 00000000000000..12bd2db2297d77 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + + +define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) { +; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: cmp w8, #5 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, splat (i32 5) + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) { +; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: cmp w8, #7 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> splat(i32 7), %a + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) { +; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: cmp w8, #234 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) { +; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: fmov s1, #4.00000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret + %fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0) + %ext = extractelement <4 x i1> %fcmp, i32 1 + ret i1 %ext +} + +; Tests the code in ExpandIntRes_SETCC +define i128 @extract_icmp_v1i128(ptr %p) { +; CHECK-LABEL: extract_icmp_v1i128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x9, x8, [x0] +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %load = load <1 x i128>, ptr %p, align 16 + %cmp = icmp eq <1 x i128> %load, zeroinitializer + %sext = sext <1 x i1> %cmp to <1 x i128> + %res = extractelement <1 x i128> %sext, i32 0 + ret i128 %res +} + +define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) { +; CHECK-LABEL: vector_loop_with_icmp: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: mov w8, #2 // =0x2 +; CHECK-NEXT: mov w9, #16 // =0x10 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: add x8, x0, #4 +; CHECK-NEXT: mov w10, #1 // =0x1 +; CHECK-NEXT: b .LBB5_2 +; CHECK-NEXT: .LBB5_1: // %pred.store.continue6 +; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: subs x9, x9, #2 +; CHECK-NEXT: add x8, x8, #8 +; CHECK-NEXT: b.eq .LBB5_6 +; CHECK-NEXT: .LBB5_2: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: cmp x11, #14 +; CHECK-NEXT: b.hi .LBB5_4 +; CHECK-NEXT: // %bb.3: // %pred.store.if +; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: stur w10, [x8, #-4] +; CHECK-NEXT: .LBB5_4: // %pred.store.continue +; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: mov x11, v0.d[1] +; CHECK-NEXT: cmp x11, #14 +; CHECK-NEXT: b.hi .LBB5_1 +; CHECK-NEXT: // %bb.5: // %pred.store.if5 +; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: str w10, [x8] +; CHECK-NEXT: b .LBB5_1 +; CHECK-NEXT: .LBB5_6: // %for.cond.cleanup +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue6 ] + %vec.ind = phi <2 x i64> [ , %entry ], [ %vec.ind.next, %pred.store.continue6 ] + %vec.cmp = icmp ult <2 x i64> %vec.ind, + %c0 = extractelement <2 x i1> %vec.cmp, i64 0 + br i1 %c0, label %pred.store.if, label %pred.store.continue + +pred.store.if: + %arrayidx = getelementptr inbounds i32, ptr %dest, i64 %index + store i32 1, ptr %arrayidx, align 4 + br label %pred.store.continue + +pred.store.continue: + %c1 = extractelement <2 x i1> %vec.cmp, i64 1 + br i1 %c1, label %pred.store.if5, label %pred.store.continue6 + +pred.store.if5: + %indexp1 = or disjoint i64 %index, 1 + %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %indexp1 + store i32 1, ptr %arrayidx2, align 4 + br label %pred.store.continue6 + +pred.store.continue6: + %index.next = add i64 %index, 2 + %vec.ind.next = add <2 x i64> %vec.ind, + %index.cmp = icmp eq i64 %index.next, 16 + br i1 %index.cmp, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: + ret void +} + + +; Negative tests + +define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) { +; CHECK-LABEL: extract_icmp_v4i32_splat_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v1.4s, w0 +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %ins = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + %icmp = icmp ult <4 x i32> %a, %splat + %ext = extractelement <4 x i1> %icmp, i32 1 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) { +; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.4s, #235 +; CHECK-NEXT: adrp x9, .LCPI7_0 +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_0] +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: xtn v1.4h, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: umov w9, v1.h[1] +; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: and w0, w9, #0x1 +; CHECK-NEXT: strb w10, [x8] +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, splat(i32 235) + %ext = extractelement <4 x i1> %icmp, i32 1 + store <4 x i1> %icmp, ptr %p, align 4 + ret i1 %ext +} + +define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) { +; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movi v1.4s, #127 +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: bfi x8, x0, #1, #2 +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: str d0, [sp, #8] +; CHECK-NEXT: ldrh w8, [x8] +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %icmp = icmp ult <4 x i32> %a, splat(i32 127) + %ext = extractelement <4 x i1> %icmp, i32 %c + ret i1 %ext +} + diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll index 9acd995d612c31..be6ee8f6899584 100644 --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -796,3 +796,29 @@ define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) { ret i64 %4 } +; Tests the scalarizeBinOp code in DAGCombiner +define void @scalarize_binop(<1 x i1> %a) { +; SSE-LABEL: scalarize_binop: +; SSE: # %bb.0: # %bb0 +; SSE-NEXT: .p2align 4 +; SSE-NEXT: .LBB35_1: # %bb1 +; SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE-NEXT: jmp .LBB35_1 +; +; AVX-LABEL: scalarize_binop: +; AVX: # %bb.0: # %bb0 +; AVX-NEXT: .p2align 4 +; AVX-NEXT: .LBB35_1: # %bb1 +; AVX-NEXT: # =>This Inner Loop Header: Depth=1 +; AVX-NEXT: jmp .LBB35_1 +bb0: + br label %bb1 + +bb1: + %b = select <1 x i1> %a, <1 x i1> zeroinitializer, <1 x i1> splat (i1 true) + br label %bb2 + +bb2: + %c = extractelement <1 x i1> %b, i32 0 + br label %bb1 +}