diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8cc920c16552e..45fde0ce90aa4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -26975,7 +26975,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { /// If a vector binop is performed on splat values, it may be profitable to /// extract, scalarize, and insert/splat. static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, - const SDLoc &DL) { + const SDLoc &DL, bool LegalTypes) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N->getOpcode(); @@ -26997,7 +26997,12 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, Src0.getValueType().getVectorElementType() != EltVT || Src1.getValueType().getVectorElementType() != EltVT || !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) || - !TLI.isOperationLegalOrCustom(Opcode, EltVT)) + // If before type legalization, allow scalar types that will eventually be + // made legal. + !TLI.isOperationLegalOrCustom( + Opcode, LegalTypes + ? EltVT + : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT))) return SDValue(); SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); @@ -27163,7 +27168,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { } } - if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL)) + if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes)) return V; return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll index 764f148ecd3aa..5a5dee0b53d43 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll @@ -16,14 +16,13 @@ define fastcc i8 @allocno_reload_assign() { ; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z0.b, #0 // =0x0 -; CHECK-NEXT: sbfx x8, x8, #0, #1 ; CHECK-NEXT: uunpklo z1.h, z0.b ; CHECK-NEXT: uunpkhi z0.h, z0.b -; CHECK-NEXT: whilelo p1.b, xzr, x8 -; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mvn w8, w8 +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p0.b, xzr, x8 ; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: uunpkhi z3.s, z1.h ; CHECK-NEXT: uunpklo z5.s, z0.h @@ -31,15 +30,15 @@ define fastcc i8 @allocno_reload_assign() { ; CHECK-NEXT: punpklo p1.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: punpklo p2.h, p1.b +; CHECK-NEXT: punpkhi p3.h, p1.b ; CHECK-NEXT: uunpklo z0.d, z2.s ; CHECK-NEXT: uunpkhi z1.d, z2.s -; CHECK-NEXT: punpkhi p3.h, p1.b +; CHECK-NEXT: punpklo p5.h, p0.b ; CHECK-NEXT: uunpklo z2.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: punpklo p5.h, p0.b +; CHECK-NEXT: punpkhi p7.h, p0.b ; CHECK-NEXT: uunpklo z4.d, z5.s ; CHECK-NEXT: uunpkhi z5.d, z5.s -; CHECK-NEXT: punpkhi p7.h, p0.b ; CHECK-NEXT: uunpklo z6.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s ; CHECK-NEXT: punpklo p0.h, p2.b diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll index 6875925adad83..f26e57b5a0b73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll @@ -5,14 +5,11 @@ define @nxv1i1(i1 %x, i1 %y) { ; CHECK-LABEL: nxv1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v8, v8, 0 -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmxor.mm v0, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %head.x = insertelement poison, i1 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -25,14 +22,11 @@ define @nxv1i1(i1 %x, i1 %y) { define @nxv2i1(i1 %x, i1 %y) { ; CHECK-LABEL: nxv2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v8, v8, 0 -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmxor.mm v0, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %head.x = insertelement poison, i1 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -45,14 +39,11 @@ define @nxv2i1(i1 %x, i1 %y) { define @nxv4i1(i1 %x, i1 %y) { ; CHECK-LABEL: nxv4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v8, v8, 0 -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmxor.mm v0, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %head.x = insertelement poison, i1 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -65,14 +56,11 @@ define @nxv4i1(i1 %x, i1 %y) { define @nxv8i1(i1 %x, i1 %y) { ; CHECK-LABEL: nxv8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v8, v8, 0 -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmxor.mm v0, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %head.x = insertelement poison, i1 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -85,14 +73,11 @@ define @nxv8i1(i1 %x, i1 %y) { define @nxv16i1(i1 %x, i1 %y) { ; CHECK-LABEL: nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v11, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v10, v11 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %head.x = insertelement poison, i1 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -105,14 +90,11 @@ define @nxv16i1(i1 %x, i1 %y) { define @nxv32i1(i1 %x, i1 %y) { ; CHECK-LABEL: nxv32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v12, v8, 0 -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v13, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v12, v13 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %head.x = insertelement poison, i1 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -125,14 +107,11 @@ define @nxv32i1(i1 %x, i1 %y) { define @nxv64i1(i1 %x, i1 %y) { ; CHECK-LABEL: nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v16, v8, 0 -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v17, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v16, v17 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %head.x = insertelement poison, i1 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -145,9 +124,9 @@ define @nxv64i1(i1 %x, i1 %y) { define @nxv1i8(i8 %x, i8 %y) { ; CHECK-LABEL: nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i8 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -160,9 +139,9 @@ define @nxv1i8(i8 %x, i8 %y) { define @nxv2i8(i8 %x, i8 %y) { ; CHECK-LABEL: nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i8 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -175,9 +154,9 @@ define @nxv2i8(i8 %x, i8 %y) { define @nxv4i8(i8 %x, i8 %y) { ; CHECK-LABEL: nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i8 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -190,9 +169,9 @@ define @nxv4i8(i8 %x, i8 %y) { define @nxv8i8(i8 %x, i8 %y) { ; CHECK-LABEL: nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i8 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -205,9 +184,9 @@ define @nxv8i8(i8 %x, i8 %y) { define @nxv16i8(i8 %x, i8 %y) { ; CHECK-LABEL: nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i8 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -220,9 +199,9 @@ define @nxv16i8(i8 %x, i8 %y) { define @nxv32i8(i8 %x, i8 %y) { ; CHECK-LABEL: nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i8 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -235,9 +214,9 @@ define @nxv32i8(i8 %x, i8 %y) { define @nxv64i8(i8 %x, i8 %y) { ; CHECK-LABEL: nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i8 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -250,9 +229,9 @@ define @nxv64i8(i8 %x, i8 %y) { define @nxv1i16(i16 %x, i16 %y) { ; CHECK-LABEL: nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i16 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -265,9 +244,9 @@ define @nxv1i16(i16 %x, i16 %y) { define @nxv2i16(i16 %x, i16 %y) { ; CHECK-LABEL: nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i16 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -280,9 +259,9 @@ define @nxv2i16(i16 %x, i16 %y) { define @nxv4i16(i16 %x, i16 %y) { ; CHECK-LABEL: nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i16 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -295,9 +274,9 @@ define @nxv4i16(i16 %x, i16 %y) { define @nxv8i16(i16 %x, i16 %y) { ; CHECK-LABEL: nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i16 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -310,9 +289,9 @@ define @nxv8i16(i16 %x, i16 %y) { define @nxv16i16(i16 %x, i16 %y) { ; CHECK-LABEL: nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i16 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -325,9 +304,9 @@ define @nxv16i16(i16 %x, i16 %y) { define @nxv32i16(i16 %x, i16 %y) { ; CHECK-LABEL: nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: ret %head.x = insertelement poison, i16 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer @@ -338,19 +317,12 @@ define @nxv32i16(i16 %x, i16 %y) { } define @nxv1i32(i32 %x, i32 %y) { -; RV32-LABEL: nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vadd.vx v8, v8, a1 -; RV64-NEXT: ret +; CHECK-LABEL: nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret %head.x = insertelement poison, i32 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer %head.y = insertelement poison, i32 %y, i32 0 @@ -360,19 +332,12 @@ define @nxv1i32(i32 %x, i32 %y) { } define @nxv2i32(i32 %x, i32 %y) { -; RV32-LABEL: nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vadd.vx v8, v8, a1 -; RV64-NEXT: ret +; CHECK-LABEL: nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret %head.x = insertelement poison, i32 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer %head.y = insertelement poison, i32 %y, i32 0 @@ -382,19 +347,12 @@ define @nxv2i32(i32 %x, i32 %y) { } define @nxv4i32(i32 %x, i32 %y) { -; RV32-LABEL: nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vadd.vx v8, v8, a1 -; RV64-NEXT: ret +; CHECK-LABEL: nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret %head.x = insertelement poison, i32 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer %head.y = insertelement poison, i32 %y, i32 0 @@ -404,19 +362,12 @@ define @nxv4i32(i32 %x, i32 %y) { } define @nxv8i32(i32 %x, i32 %y) { -; RV32-LABEL: nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vadd.vx v8, v8, a1 -; RV64-NEXT: ret +; CHECK-LABEL: nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret %head.x = insertelement poison, i32 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer %head.y = insertelement poison, i32 %y, i32 0 @@ -426,19 +377,12 @@ define @nxv8i32(i32 %x, i32 %y) { } define @nxv16i32(i32 %x, i32 %y) { -; RV32-LABEL: nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vadd.vx v8, v8, a1 -; RV64-NEXT: ret +; CHECK-LABEL: nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret %head.x = insertelement poison, i32 %x, i32 0 %splat.x = shufflevector %head.x, poison, zeroinitializer %head.y = insertelement poison, i32 %y, i32 0 @@ -452,16 +396,15 @@ define @nxv1i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -484,16 +427,15 @@ define @nxv2i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vadd.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -516,16 +458,15 @@ define @nxv4i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vadd.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -548,16 +489,15 @@ define @nxv8i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -619,3 +559,50 @@ define @nxv2f64(double %x, double %y) { %v = fadd %splat.x, %splat.y ret %v } + +define @uaddsatnxv4i8(i8 %x, i8 %y) { +; CHECK-LABEL: uaddsatnxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: ret + %head.x = insertelement poison, i8 %x, i32 0 + %splat.x = shufflevector %head.x, poison, zeroinitializer + %head.y = insertelement poison, i8 %y, i32 0 + %splat.y = shufflevector %head.y, poison, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i8( %splat.x, %splat.y) + ret %v +} + +define @uaddsatnxv1i64(i64 %x, i64 %y) { +; RV32-LABEL: uaddsatnxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a0, sp +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uaddsatnxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: vsaddu.vx v8, v8, a1 +; RV64-NEXT: ret + %head.x = insertelement poison, i64 %x, i32 0 + %splat.x = shufflevector %head.x, poison, zeroinitializer + %head.y = insertelement poison, i64 %y, i32 0 + %splat.y = shufflevector %head.y, poison, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i8( %splat.x, %splat.y) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll index ee8c322961c7b..8f40b02423094 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll @@ -461,16 +461,15 @@ define <1 x i64> @v1i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -493,17 +492,15 @@ define <2 x i64> @v2i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vadd.vv v9, v8, v9 -; RV32-NEXT: vrgather.vi v8, v9, 0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -527,17 +524,15 @@ define <4 x i64> @v4i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vadd.vv v10, v8, v10 -; RV32-NEXT: vrgather.vi v8, v10, 0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -561,17 +556,15 @@ define <8 x i64> @v8i64(i64 %x, i64 %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vadd.vv v12, v8, v12 -; RV32-NEXT: vrgather.vi v8, v12, 0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll index 27fceb0112ae3..2b141097366cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -773,16 +773,15 @@ define @vadd_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32-LABEL: vadd_xx_nxv8i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: add a2, a0, a2 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll index 40d0d9aa9d1d6..a84e2c984f669 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll @@ -1224,16 +1224,13 @@ define @vand_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32-LABEL: vand_xx_nxv8i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll index 1a6d5a1d0029d..0b8620c90c62e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM -; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M ; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M define @vmul_vv_nxv1i8( %va, %vb) { @@ -864,21 +864,21 @@ define @vmul_vi_nxv8i64_2( %va) { } define @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind { -; RV32-LABEL: vmul_xx_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32NOM-LABEL: vmul_xx_nxv8i64: +; RV32NOM: # %bb.0: +; RV32NOM-NEXT: addi sp, sp, -16 +; RV32NOM-NEXT: sw a1, 12(sp) +; RV32NOM-NEXT: sw a0, 8(sp) +; RV32NOM-NEXT: addi a0, sp, 8 +; RV32NOM-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32NOM-NEXT: vlse64.v v8, (a0), zero +; RV32NOM-NEXT: sw a3, 4(sp) +; RV32NOM-NEXT: sw a2, 0(sp) +; RV32NOM-NEXT: mv a0, sp +; RV32NOM-NEXT: vlse64.v v16, (a0), zero +; RV32NOM-NEXT: vmul.vv v8, v8, v16 +; RV32NOM-NEXT: addi sp, sp, 16 +; RV32NOM-NEXT: ret ; ; RV64NOM-LABEL: vmul_xx_nxv8i64: ; RV64NOM: # %bb.0: @@ -887,6 +887,23 @@ define @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV64NOM-NEXT: vmul.vx v8, v8, a1 ; RV64NOM-NEXT: ret ; +; RV32M-LABEL: vmul_xx_nxv8i64: +; RV32M: # %bb.0: +; RV32M-NEXT: addi sp, sp, -16 +; RV32M-NEXT: mul a4, a0, a2 +; RV32M-NEXT: sw a4, 8(sp) +; RV32M-NEXT: mul a3, a0, a3 +; RV32M-NEXT: mulhu a0, a0, a2 +; RV32M-NEXT: add a0, a0, a3 +; RV32M-NEXT: mul a1, a1, a2 +; RV32M-NEXT: add a0, a0, a1 +; RV32M-NEXT: sw a0, 12(sp) +; RV32M-NEXT: addi a0, sp, 8 +; RV32M-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32M-NEXT: vlse64.v v8, (a0), zero +; RV32M-NEXT: addi sp, sp, 16 +; RV32M-NEXT: ret +; ; RV64M-LABEL: vmul_xx_nxv8i64: ; RV64M: # %bb.0: ; RV64M-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll index fbbd71cb35445..dcfe07c1fba65 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll @@ -1015,16 +1015,13 @@ define @vor_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32-LABEL: vor_xx_nxv8i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: or a1, a1, a3 ; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll index b7f404c8e5ac9..c2173c9a291fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll @@ -795,16 +795,15 @@ define @vsub_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32-LABEL: vsub_xx_nxv8i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sub a4, a0, a2 +; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: sub a1, a1, a3 +; RV32-NEXT: sub a1, a1, a0 ; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll index 3f10b10675ca7..b03a105610dfd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll @@ -1224,16 +1224,13 @@ define @vxor_xx_nxv8i64(i64 %a, i64 %b) nounwind { ; RV32-LABEL: vxor_xx_nxv8i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: xor a0, a0, a2 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vxor.vv v8, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll index 2b08f1c23b59a..8b30473983d8c 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll @@ -5,16 +5,11 @@ target triple = "wasm32-unknown-unknown" -;; TODO: Optimize this further by scalarizing the add - ; CHECK-LABEL: shl_add: ; CHECK-NEXT: .functype shl_add (v128, i32, i32) -> (v128) -; CHECK-NEXT: i8x16.splat $push1=, $1 -; CHECK-NEXT: i8x16.splat $push0=, $2 -; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0 -; CHECK-NEXT: i8x16.extract_lane_u $push3=, $pop2, 0 -; CHECK-NEXT: i8x16.shl $push4=, $0, $pop3 -; CHECK-NEXT: return $pop4 +; CHECK-NEXT: i32.add $push0=, $1, $2 +; CHECK-NEXT: i8x16.shl $push1=, $0, $pop0 +; CHECK-NEXT: return $pop1 define <16 x i8> @shl_add(<16 x i8> %v, i8 %a, i8 %b) { %t1 = insertelement <16 x i8> undef, i8 %a, i32 0 %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer @@ -44,13 +39,13 @@ define <16 x i8> @shl_abs(<16 x i8> %v, i8 %a) { ; CHECK-LABEL: shl_abs_add: ; CHECK-NEXT: .functype shl_abs_add (v128, i32, i32) -> (v128) -; CHECK-NEXT: i8x16.splat $push1=, $1 -; CHECK-NEXT: i8x16.splat $push0=, $2 -; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0 -; CHECK-NEXT: i8x16.abs $push3=, $pop2 -; CHECK-NEXT: i8x16.extract_lane_u $push4=, $pop3, 0 -; CHECK-NEXT: i8x16.shl $push5=, $0, $pop4 -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: i32.add $push0=, $1, $2 +; CHECK-NEXT: i8x16.splat $push1=, $pop0 +; CHECK-NEXT: i8x16.abs $push2=, $pop1 +; CHECK-NEXT: i8x16.extract_lane_u $push3=, $pop2, 0 +; CHECK-NEXT: i8x16.shl $push4=, $0, $pop3 +; CHECK-NEXT: return $pop4 + define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) { %t1 = insertelement <16 x i8> undef, i8 %a, i32 0 %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer