diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7beaeb9b7a171..f413d5ac853b9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2783,10 +2783,16 @@ bool TargetLowering::SimplifyDemandedBits( unsigned DemandedBitsLZ = DemandedBits.countl_zero(); APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); KnownBits KnownOp0, KnownOp1; - if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO, - Depth + 1) || - SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO, + auto GetDemandedBitsLHSMask = [&](APInt Demanded, + const KnownBits &KnownRHS) { + if (Op.getOpcode() == ISD::MUL) + Demanded.clearHighBits(KnownRHS.countMinTrailingZeros()); + return Demanded; + }; + if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO, Depth + 1) || + SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1), + DemandedElts, KnownOp0, TLO, Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll index 736f66c935e74..40b8a47f92aa7 100644 --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -1709,289 +1709,289 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: fmov s4, w0 ; CHECK-NEXT: ldr b0, [sp, #80] ; CHECK-NEXT: add x8, sp, #88 -; CHECK-NEXT: ldr b2, [sp, #144] -; CHECK-NEXT: fmov s4, w0 +; CHECK-NEXT: ldr b1, [sp, #144] ; CHECK-NEXT: add x10, sp, #152 -; CHECK-NEXT: ldr b3, [sp, #16] +; CHECK-NEXT: ldr b6, [sp, #16] ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: ld1 { v2.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #24 -; CHECK-NEXT: ldr b1, [sp, #344] ; CHECK-NEXT: add x9, sp, #96 -; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #352 +; CHECK-NEXT: ldr b2, [sp, #344] ; CHECK-NEXT: mov v4.b[1], w1 +; CHECK-NEXT: ld1 { v1.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: ld1 { v6.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #352 ; CHECK-NEXT: add x8, sp, #104 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] ; CHECK-NEXT: add x9, sp, #160 -; CHECK-NEXT: ld1 { v1.b }[1], [x10] -; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #32 -; CHECK-NEXT: add x12, sp, #360 -; CHECK-NEXT: ld1 { v3.b }[2], [x9] +; CHECK-NEXT: ld1 { v2.b }[1], [x10] +; CHECK-NEXT: ld1 { v1.b }[2], [x9] +; CHECK-NEXT: add x10, sp, #32 ; CHECK-NEXT: add x11, sp, #112 -; CHECK-NEXT: add x10, sp, #120 -; CHECK-NEXT: ld1 { v1.b }[2], [x12] -; CHECK-NEXT: add x12, sp, #168 -; CHECK-NEXT: ld1 { v0.b }[3], [x8] ; CHECK-NEXT: mov v4.b[2], w2 -; CHECK-NEXT: ld1 { v2.b }[3], [x12] -; CHECK-NEXT: add x12, sp, #40 -; CHECK-NEXT: ld1 { v3.b }[3], [x12] -; CHECK-NEXT: add x13, sp, #176 -; CHECK-NEXT: ldr b16, [sp, #216] -; CHECK-NEXT: ld1 { v0.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #48 -; CHECK-NEXT: add x12, sp, #368 -; CHECK-NEXT: ld1 { v2.b }[4], [x13] +; CHECK-NEXT: ld1 { v6.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #168 +; CHECK-NEXT: ld1 { v0.b }[3], [x8] +; CHECK-NEXT: ldr b5, [sp, #216] ; CHECK-NEXT: add x13, sp, #224 -; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: ld1 { v1.b }[3], [x10] +; CHECK-NEXT: add x10, sp, #40 +; CHECK-NEXT: add x12, sp, #120 +; CHECK-NEXT: ld1 { v6.b }[3], [x10] +; CHECK-NEXT: add x10, sp, #176 +; CHECK-NEXT: ld1 { v5.b }[1], [x13] ; CHECK-NEXT: mov v4.b[3], w3 -; CHECK-NEXT: ld1 { v3.b }[4], [x11] -; CHECK-NEXT: ld1 { v16.b }[1], [x13] -; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #56 -; CHECK-NEXT: ld1 { v1.b }[3], [x12] -; CHECK-NEXT: add x12, sp, #184 -; CHECK-NEXT: ldr b5, [sp, #280] -; CHECK-NEXT: add x11, sp, #376 -; CHECK-NEXT: ld1 { v3.b }[5], [x10] -; CHECK-NEXT: ld1 { v2.b }[5], [x12] -; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: ld1 { v0.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #48 +; CHECK-NEXT: add x8, sp, #360 +; CHECK-NEXT: ld1 { v1.b }[4], [x10] +; CHECK-NEXT: add x13, sp, #56 +; CHECK-NEXT: ld1 { v6.b }[4], [x11] +; CHECK-NEXT: ldr b7, [sp, #280] +; CHECK-NEXT: ld1 { v2.b }[2], [x8] +; CHECK-NEXT: add x15, sp, #232 +; CHECK-NEXT: ld1 { v0.b }[5], [x12] +; CHECK-NEXT: add x14, sp, #184 ; CHECK-NEXT: mov v4.b[4], w4 +; CHECK-NEXT: ld1 { v5.b }[2], [x15] +; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: ld1 { v6.b }[5], [x13] +; CHECK-NEXT: add x13, sp, #288 +; CHECK-NEXT: add x10, sp, #368 +; CHECK-NEXT: ld1 { v7.b }[1], [x13] +; CHECK-NEXT: ld1 { v1.b }[5], [x14] +; CHECK-NEXT: ld1 { v2.b }[3], [x10] +; CHECK-NEXT: add x15, sp, #240 ; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #288 -; CHECK-NEXT: add x15, sp, #64 -; CHECK-NEXT: ld1 { v16.b }[2], [x10] -; CHECK-NEXT: ldr b17, [sp, #408] -; CHECK-NEXT: ld1 { v5.b }[1], [x9] -; CHECK-NEXT: add x14, sp, #192 -; CHECK-NEXT: ld1 { v1.b }[4], [x11] -; CHECK-NEXT: ld1 { v3.b }[6], [x15] -; CHECK-NEXT: add x15, sp, #416 -; CHECK-NEXT: ld1 { v2.b }[6], [x14] -; CHECK-NEXT: add x14, sp, #240 -; CHECK-NEXT: ld1 { v17.b }[1], [x15] ; CHECK-NEXT: add x9, sp, #296 -; CHECK-NEXT: add x8, sp, #136 ; CHECK-NEXT: mov v4.b[5], w5 -; CHECK-NEXT: add x13, sp, #384 -; CHECK-NEXT: ld1 { v16.b }[3], [x14] -; CHECK-NEXT: ld1 { v5.b }[2], [x9] -; CHECK-NEXT: ld1 { v1.b }[5], [x13] -; CHECK-NEXT: ld1 { v0.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #424 -; CHECK-NEXT: add x9, sp, #248 -; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #304 -; CHECK-NEXT: add x10, sp, #392 -; CHECK-NEXT: ld1 { v16.b }[4], [x9] -; CHECK-NEXT: ld1 { v5.b }[3], [x8] +; CHECK-NEXT: add x11, sp, #192 +; CHECK-NEXT: ld1 { v5.b }[3], [x15] +; CHECK-NEXT: ldr b3, [sp, #408] +; CHECK-NEXT: ld1 { v7.b }[2], [x9] +; CHECK-NEXT: add x12, sp, #64 +; CHECK-NEXT: add x13, sp, #376 +; CHECK-NEXT: ld1 { v1.b }[6], [x11] +; CHECK-NEXT: add x11, sp, #416 +; CHECK-NEXT: ld1 { v6.b }[6], [x12] +; CHECK-NEXT: add x12, sp, #248 +; CHECK-NEXT: ld1 { v3.b }[1], [x11] ; CHECK-NEXT: mov v4.b[6], w6 -; CHECK-NEXT: ld1 { v1.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #432 -; CHECK-NEXT: add x9, sp, #256 -; CHECK-NEXT: ld1 { v17.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #312 -; CHECK-NEXT: ldr b22, [sp, #608] -; CHECK-NEXT: add x8, sp, #400 -; CHECK-NEXT: ld1 { v16.b }[5], [x9] -; CHECK-NEXT: ld1 { v5.b }[4], [x10] -; CHECK-NEXT: add x9, sp, #616 -; CHECK-NEXT: ld1 { v1.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #440 -; CHECK-NEXT: ld1 { v22.b }[1], [x9] +; CHECK-NEXT: ld1 { v2.b }[4], [x13] +; CHECK-NEXT: add x11, sp, #304 +; CHECK-NEXT: ld1 { v5.b }[4], [x12] +; CHECK-NEXT: ld1 { v7.b }[3], [x11] +; CHECK-NEXT: add x8, sp, #136 +; CHECK-NEXT: add x15, sp, #384 +; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: ld1 { v0.b }[7], [x8] +; CHECK-NEXT: ld1 { v3.b }[2], [x9] +; CHECK-NEXT: ld1 { v2.b }[5], [x15] +; CHECK-NEXT: add x8, sp, #312 ; CHECK-NEXT: mov v4.b[7], w7 -; CHECK-NEXT: ld1 { v17.b }[4], [x8] +; CHECK-NEXT: add x9, sp, #256 +; CHECK-NEXT: add x10, sp, #200 +; CHECK-NEXT: ld1 { v7.b }[4], [x8] +; CHECK-NEXT: ld1 { v5.b }[5], [x9] +; CHECK-NEXT: add x14, sp, #72 +; CHECK-NEXT: ld1 { v1.b }[7], [x10] +; CHECK-NEXT: add x10, sp, #432 +; CHECK-NEXT: add x8, sp, #392 +; CHECK-NEXT: ld1 { v6.b }[7], [x14] +; CHECK-NEXT: ld1 { v3.b }[3], [x10] +; CHECK-NEXT: ld1 { v2.b }[6], [x8] ; CHECK-NEXT: add x8, sp, #320 +; CHECK-NEXT: add x9, sp, #264 +; CHECK-NEXT: sshll v21.8h, v4.8b, #0 +; CHECK-NEXT: ldr b4, [sp, #208] +; CHECK-NEXT: ld1 { v7.b }[5], [x8] +; CHECK-NEXT: ld1 { v5.b }[6], [x9] +; CHECK-NEXT: add x10, sp, #440 +; CHECK-NEXT: add x8, sp, #400 +; CHECK-NEXT: sshll v16.8h, v6.8b, #0 +; CHECK-NEXT: sshll v6.8h, v4.8b, #0 +; CHECK-NEXT: ld1 { v3.b }[4], [x10] +; CHECK-NEXT: ld1 { v2.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #272 +; CHECK-NEXT: add x9, sp, #328 +; CHECK-NEXT: ldr b4, [sp, #608] +; CHECK-NEXT: ld1 { v7.b }[6], [x9] +; CHECK-NEXT: ld1 { v5.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #616 ; CHECK-NEXT: add x10, sp, #448 -; CHECK-NEXT: ldr b6, [sp, #208] -; CHECK-NEXT: ld1 { v5.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #624 -; CHECK-NEXT: ldr b7, [sp, #472] -; CHECK-NEXT: ld1 { v22.b }[2], [x8] -; CHECK-NEXT: ld1 { v17.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #328 -; CHECK-NEXT: sshll v20.8h, v4.8b, #0 -; CHECK-NEXT: ldr b4, [sp, #480] +; CHECK-NEXT: ld1 { v4.b }[1], [x8] +; CHECK-NEXT: ldr b18, [sp, #480] +; CHECK-NEXT: ld1 { v3.b }[5], [x10] +; CHECK-NEXT: add x9, sp, #336 +; CHECK-NEXT: ldr b17, [sp, #472] +; CHECK-NEXT: add x8, sp, #488 +; CHECK-NEXT: ld1 { v7.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #624 +; CHECK-NEXT: ld1 { v18.b }[1], [x8] +; CHECK-NEXT: sshll v22.8h, v5.8b, #0 ; CHECK-NEXT: add x8, sp, #456 -; CHECK-NEXT: ld1 { v5.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #632 -; CHECK-NEXT: sshll v6.8h, v6.8b, #0 -; CHECK-NEXT: ld1 { v22.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #488 -; CHECK-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #336 -; CHECK-NEXT: ld1 { v4.b }[1], [x10] -; CHECK-NEXT: sshll v7.8h, v7.8b, #0 -; CHECK-NEXT: ld1 { v5.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #640 -; CHECK-NEXT: add x9, sp, #264 -; CHECK-NEXT: ld1 { v22.b }[4], [x8] +; CHECK-NEXT: sshll v5.8h, v17.8b, #0 +; CHECK-NEXT: ld1 { v4.b }[2], [x9] +; CHECK-NEXT: ld1 { v3.b }[6], [x8] ; CHECK-NEXT: add x8, sp, #496 -; CHECK-NEXT: ld1 { v16.b }[6], [x9] -; CHECK-NEXT: ld1 { v4.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #648 -; CHECK-NEXT: smull v18.4s, v6.4h, v7.4h -; CHECK-NEXT: ldr b7, [sp, #544] -; CHECK-NEXT: add x9, sp, #272 -; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v22.b }[5], [x8] +; CHECK-NEXT: sshll v17.8h, v7.8b, #0 +; CHECK-NEXT: add x10, sp, #632 +; CHECK-NEXT: ld1 { v18.b }[2], [x8] +; CHECK-NEXT: add x9, sp, #464 ; CHECK-NEXT: add x8, sp, #504 -; CHECK-NEXT: ld1 { v16.b }[7], [x9] -; CHECK-NEXT: ld1 { v4.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #552 -; CHECK-NEXT: add x9, sp, #656 -; CHECK-NEXT: ld1 { v7.b }[1], [x8] +; CHECK-NEXT: smull v19.4s, v6.4h, v5.4h +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v4.b }[3], [x10] +; CHECK-NEXT: ld1 { v3.b }[7], [x9] +; CHECK-NEXT: smull v6.4s, v16.4h, v17.4h +; CHECK-NEXT: add x9, sp, #640 +; CHECK-NEXT: ld1 { v18.b }[3], [x8] +; CHECK-NEXT: smull2 v16.4s, v16.8h, v17.8h +; CHECK-NEXT: ldr b17, [sp, #672] +; CHECK-NEXT: ld1 { v4.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #680 +; CHECK-NEXT: ldr b20, [sp, #544] +; CHECK-NEXT: mov v5.s[0], v19.s[0] ; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ldr b21, [sp, #672] -; CHECK-NEXT: ld1 { v22.b }[6], [x9] -; CHECK-NEXT: mov v6.s[0], v18.s[0] -; CHECK-NEXT: add x9, sp, #664 -; CHECK-NEXT: ld1 { v4.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #560 -; CHECK-NEXT: sshll v23.8h, v16.8b, #0 -; CHECK-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #520 -; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v22.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #528 -; CHECK-NEXT: add x10, sp, #464 -; CHECK-NEXT: ld1 { v4.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #568 -; CHECK-NEXT: smull2 v18.4s, v20.8h, v23.8h -; CHECK-NEXT: ld1 { v7.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #680 -; CHECK-NEXT: smlal v6.4s, v20.4h, v23.4h -; CHECK-NEXT: ld1 { v21.b }[1], [x8] -; CHECK-NEXT: sshll v20.8h, v22.8b, #0 -; CHECK-NEXT: ldr b22, [sp, #736] -; CHECK-NEXT: ld1 { v4.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #576 -; CHECK-NEXT: ldr b23, [sp, #1000] -; CHECK-NEXT: ld1 { v7.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #688 -; CHECK-NEXT: sshll v24.8h, v22.8b, #0 -; CHECK-NEXT: ld1 { v21.b }[2], [x9] +; CHECK-NEXT: ld1 { v17.b }[1], [x9] +; CHECK-NEXT: add x11, sp, #552 +; CHECK-NEXT: add x10, sp, #648 +; CHECK-NEXT: ld1 { v18.b }[4], [x8] +; CHECK-NEXT: ld1 { v20.b }[1], [x11] +; CHECK-NEXT: ld1 { v4.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #688 +; CHECK-NEXT: add x9, sp, #520 +; CHECK-NEXT: ld1 { v17.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #560 +; CHECK-NEXT: smull2 v7.4s, v21.8h, v22.8h +; CHECK-NEXT: ld1 { v18.b }[5], [x9] +; CHECK-NEXT: smlal v5.4s, v21.4h, v22.4h +; CHECK-NEXT: ld1 { v20.b }[2], [x10] +; CHECK-NEXT: ldr b21, [sp, #736] +; CHECK-NEXT: ldr b22, [sp, #1000] +; CHECK-NEXT: add x8, sp, #656 ; CHECK-NEXT: add x9, sp, #696 -; CHECK-NEXT: sshll v25.8h, v23.8b, #0 -; CHECK-NEXT: add x8, sp, #536 -; CHECK-NEXT: ldr b22, [sp, #872] -; CHECK-NEXT: ldr b23, [sp, #936] -; CHECK-NEXT: ld1 { v4.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #584 -; CHECK-NEXT: ld1 { v17.b }[7], [x10] -; CHECK-NEXT: ld1 { v21.b }[3], [x9] -; CHECK-NEXT: ld1 { v7.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #880 -; CHECK-NEXT: add x9, sp, #704 -; CHECK-NEXT: smull v25.4s, v24.4h, v25.4h -; CHECK-NEXT: ldr b24, [sp, #744] -; CHECK-NEXT: ld1 { v22.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #944 -; CHECK-NEXT: add x10, sp, #888 -; CHECK-NEXT: ld1 { v21.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #752 -; CHECK-NEXT: ld1 { v23.b }[1], [x8] -; CHECK-NEXT: ld1 { v24.b }[1], [x9] -; CHECK-NEXT: add x8, sp, #712 +; CHECK-NEXT: add x11, sp, #568 +; CHECK-NEXT: ld1 { v4.b }[6], [x8] +; CHECK-NEXT: add x8, sp, #528 +; CHECK-NEXT: ld1 { v17.b }[3], [x9] +; CHECK-NEXT: sshll v21.8h, v21.8b, #0 +; CHECK-NEXT: sshll v24.8h, v22.8b, #0 +; CHECK-NEXT: ld1 { v18.b }[6], [x8] +; CHECK-NEXT: ld1 { v20.b }[3], [x11] +; CHECK-NEXT: add x10, sp, #704 +; CHECK-NEXT: ldr b23, [sp, #808] +; CHECK-NEXT: movi v19.2d, #0000000000000000 +; CHECK-NEXT: add x9, sp, #536 +; CHECK-NEXT: ld1 { v17.b }[4], [x10] +; CHECK-NEXT: add x10, sp, #576 +; CHECK-NEXT: ldr b22, [sp, #744] +; CHECK-NEXT: add x11, sp, #816 +; CHECK-NEXT: smull v24.4s, v21.4h, v24.4h +; CHECK-NEXT: ld1 { v18.b }[7], [x9] +; CHECK-NEXT: ld1 { v20.b }[4], [x10] +; CHECK-NEXT: add x10, sp, #752 +; CHECK-NEXT: ld1 { v23.b }[1], [x11] +; CHECK-NEXT: add x9, sp, #712 +; CHECK-NEXT: ld1 { v22.b }[1], [x10] +; CHECK-NEXT: ld1 { v17.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #584 +; CHECK-NEXT: add x10, sp, #824 +; CHECK-NEXT: sshll v21.8h, v18.8b, #0 +; CHECK-NEXT: ld1 { v20.b }[5], [x9] ; CHECK-NEXT: add x9, sp, #760 -; CHECK-NEXT: ld1 { v22.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #952 -; CHECK-NEXT: mov v19.s[0], v25.s[0] -; CHECK-NEXT: ldr b25, [sp, #808] +; CHECK-NEXT: ldr b18, [sp, #936] ; CHECK-NEXT: ld1 { v23.b }[2], [x10] -; CHECK-NEXT: ld1 { v21.b }[5], [x8] -; CHECK-NEXT: ld1 { v24.b }[2], [x9] -; CHECK-NEXT: add x8, sp, #816 -; CHECK-NEXT: add x9, sp, #896 -; CHECK-NEXT: ld1 { v25.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #960 -; CHECK-NEXT: ld1 { v22.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #768 -; CHECK-NEXT: ld1 { v23.b }[3], [x8] -; CHECK-NEXT: add x10, sp, #904 -; CHECK-NEXT: ld1 { v24.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #824 -; CHECK-NEXT: add x8, sp, #720 -; CHECK-NEXT: ld1 { v25.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #968 -; CHECK-NEXT: ld1 { v22.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #776 -; CHECK-NEXT: ld1 { v23.b }[4], [x9] -; CHECK-NEXT: ld1 { v21.b }[6], [x8] -; CHECK-NEXT: ld1 { v24.b }[4], [x10] -; CHECK-NEXT: add x8, sp, #832 -; CHECK-NEXT: add x9, sp, #912 -; CHECK-NEXT: ld1 { v25.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #976 -; CHECK-NEXT: ld1 { v22.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #784 -; CHECK-NEXT: ld1 { v23.b }[5], [x8] -; CHECK-NEXT: add x10, sp, #920 -; CHECK-NEXT: ld1 { v24.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #840 -; CHECK-NEXT: add x8, sp, #728 -; CHECK-NEXT: ld1 { v25.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #984 -; CHECK-NEXT: ld1 { v22.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #792 -; CHECK-NEXT: ld1 { v23.b }[6], [x9] -; CHECK-NEXT: ld1 { v21.b }[7], [x8] -; CHECK-NEXT: ld1 { v24.b }[6], [x10] -; CHECK-NEXT: add x8, sp, #848 -; CHECK-NEXT: add x9, sp, #928 -; CHECK-NEXT: ld1 { v25.b }[5], [x8] -; CHECK-NEXT: add x12, sp, #72 -; CHECK-NEXT: add x8, sp, #992 -; CHECK-NEXT: ld1 { v22.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #800 -; CHECK-NEXT: ld1 { v3.b }[7], [x12] -; CHECK-NEXT: ld1 { v23.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #592 -; CHECK-NEXT: ld1 { v24.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #856 -; CHECK-NEXT: ld1 { v7.b }[6], [x8] -; CHECK-NEXT: add x11, sp, #200 -; CHECK-NEXT: ld1 { v25.b }[6], [x9] -; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: sshll v21.8h, v21.8b, #0 +; CHECK-NEXT: mov v19.s[0], v24.s[0] +; CHECK-NEXT: ldr b24, [sp, #872] +; CHECK-NEXT: ld1 { v22.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #944 +; CHECK-NEXT: add x11, sp, #880 +; CHECK-NEXT: add x10, sp, #768 +; CHECK-NEXT: ld1 { v18.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #832 +; CHECK-NEXT: ld1 { v24.b }[1], [x11] +; CHECK-NEXT: ld1 { v23.b }[3], [x9] +; CHECK-NEXT: ld1 { v22.b }[3], [x10] +; CHECK-NEXT: add x10, sp, #952 +; CHECK-NEXT: add x12, sp, #888 +; CHECK-NEXT: add x9, sp, #592 +; CHECK-NEXT: add x11, sp, #776 +; CHECK-NEXT: ld1 { v18.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #840 +; CHECK-NEXT: ld1 { v24.b }[2], [x12] +; CHECK-NEXT: ld1 { v23.b }[4], [x10] +; CHECK-NEXT: ld1 { v22.b }[4], [x11] +; CHECK-NEXT: ld1 { v20.b }[6], [x9] +; CHECK-NEXT: add x9, sp, #960 +; CHECK-NEXT: add x11, sp, #896 +; CHECK-NEXT: add x10, sp, #784 +; CHECK-NEXT: ld1 { v18.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #848 +; CHECK-NEXT: ld1 { v24.b }[3], [x11] +; CHECK-NEXT: ld1 { v23.b }[5], [x9] +; CHECK-NEXT: ld1 { v22.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #968 +; CHECK-NEXT: add x12, sp, #904 +; CHECK-NEXT: add x9, sp, #600 +; CHECK-NEXT: add x11, sp, #792 +; CHECK-NEXT: ld1 { v18.b }[4], [x10] +; CHECK-NEXT: add x10, sp, #856 +; CHECK-NEXT: ld1 { v24.b }[4], [x12] +; CHECK-NEXT: ld1 { v23.b }[6], [x10] +; CHECK-NEXT: ld1 { v22.b }[6], [x11] +; CHECK-NEXT: ld1 { v20.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #976 +; CHECK-NEXT: add x11, sp, #912 +; CHECK-NEXT: add x10, sp, #800 +; CHECK-NEXT: ld1 { v18.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #864 +; CHECK-NEXT: ld1 { v24.b }[5], [x11] +; CHECK-NEXT: ld1 { v23.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #720 +; CHECK-NEXT: ld1 { v22.b }[7], [x10] +; CHECK-NEXT: add x10, sp, #984 +; CHECK-NEXT: ld1 { v17.b }[6], [x9] +; CHECK-NEXT: add x9, sp, #920 +; CHECK-NEXT: ld1 { v18.b }[6], [x10] +; CHECK-NEXT: ld1 { v24.b }[6], [x9] +; CHECK-NEXT: add x10, sp, #728 +; CHECK-NEXT: add x8, sp, #664 +; CHECK-NEXT: sshll v20.8h, v20.8b, #0 ; CHECK-NEXT: sshll v22.8h, v22.8b, #0 ; CHECK-NEXT: sshll v23.8h, v23.8b, #0 -; CHECK-NEXT: add x8, sp, #600 -; CHECK-NEXT: sshll v24.8h, v24.8b, #0 -; CHECK-NEXT: add x9, sp, #864 -; CHECK-NEXT: ld1 { v2.b }[7], [x11] -; CHECK-NEXT: ld1 { v7.b }[7], [x8] -; CHECK-NEXT: ld1 { v25.b }[7], [x9] -; CHECK-NEXT: smull v16.4s, v3.4h, v5.4h -; CHECK-NEXT: smull2 v3.4s, v3.8h, v5.8h -; CHECK-NEXT: smull v5.4s, v21.4h, v23.4h -; CHECK-NEXT: smull2 v21.4s, v21.8h, v23.8h -; CHECK-NEXT: smull2 v23.4s, v20.8h, v22.8h -; CHECK-NEXT: smlal v19.4s, v4.4h, v24.4h -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: sshll v17.8h, v17.8b, #0 +; CHECK-NEXT: add x9, sp, #992 +; CHECK-NEXT: ld1 { v17.b }[7], [x10] +; CHECK-NEXT: add x10, sp, #928 +; CHECK-NEXT: ld1 { v18.b }[7], [x9] +; CHECK-NEXT: ld1 { v4.b }[7], [x8] +; CHECK-NEXT: ld1 { v24.b }[7], [x10] +; CHECK-NEXT: smlal v19.4s, v21.4h, v22.4h +; CHECK-NEXT: smull2 v21.4s, v21.8h, v22.8h +; CHECK-NEXT: smull v22.4s, v20.4h, v23.4h +; CHECK-NEXT: smull2 v20.4s, v20.8h, v23.8h ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll v7.8h, v7.8b, #0 -; CHECK-NEXT: sshll v25.8h, v25.8b, #0 -; CHECK-NEXT: smlal2 v3.4s, v2.8h, v17.8h -; CHECK-NEXT: smlal v16.4s, v2.4h, v17.4h -; CHECK-NEXT: smlal2 v23.4s, v4.8h, v24.8h -; CHECK-NEXT: smlal2 v18.4s, v0.8h, v1.8h -; CHECK-NEXT: smlal v6.4s, v0.4h, v1.4h -; CHECK-NEXT: smlal v19.4s, v20.4h, v22.4h -; CHECK-NEXT: smlal2 v21.4s, v7.8h, v25.8h -; CHECK-NEXT: smlal v5.4s, v7.4h, v25.4h -; CHECK-NEXT: add v0.4s, v18.4s, v3.4s -; CHECK-NEXT: add v1.4s, v6.4s, v16.4s -; CHECK-NEXT: add v2.4s, v23.4s, v21.4s -; CHECK-NEXT: add v3.4s, v19.4s, v5.4s +; CHECK-NEXT: sshll v3.8h, v3.8b, #0 +; CHECK-NEXT: sshll v2.8h, v2.8b, #0 +; CHECK-NEXT: sshll v17.8h, v17.8b, #0 +; CHECK-NEXT: sshll v18.8h, v18.8b, #0 +; CHECK-NEXT: sshll v4.8h, v4.8b, #0 +; CHECK-NEXT: sshll v23.8h, v24.8b, #0 +; CHECK-NEXT: smlal2 v16.4s, v1.8h, v3.8h +; CHECK-NEXT: smlal v6.4s, v1.4h, v3.4h +; CHECK-NEXT: smlal2 v7.4s, v0.8h, v2.8h +; CHECK-NEXT: smlal v5.4s, v0.4h, v2.4h +; CHECK-NEXT: smlal2 v20.4s, v17.8h, v18.8h +; CHECK-NEXT: smlal v22.4s, v17.4h, v18.4h +; CHECK-NEXT: smlal2 v21.4s, v4.8h, v23.8h +; CHECK-NEXT: smlal v19.4s, v4.4h, v23.4h +; CHECK-NEXT: add v0.4s, v7.4s, v16.4s +; CHECK-NEXT: add v1.4s, v5.4s, v6.4s +; CHECK-NEXT: add v2.4s, v21.4s, v20.4s +; CHECK-NEXT: add v3.4s, v19.4s, v22.4s ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add v1.4s, v3.4s, v2.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s @@ -2050,10 +2050,10 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: ld1 { v3.b }[2], [x10] ; CHECK-NEXT: ld1 { v5.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #176 -; CHECK-NEXT: ldr b6, [sp, #544] +; CHECK-NEXT: ldr b6, [sp, #672] ; CHECK-NEXT: ld1 { v0.b }[4], [x12] -; CHECK-NEXT: add x14, sp, #552 -; CHECK-NEXT: ldr b7, [sp, #672] +; CHECK-NEXT: add x14, sp, #680 +; CHECK-NEXT: ldr b7, [sp, #544] ; CHECK-NEXT: ld1 { v2.b }[4], [x8] ; CHECK-NEXT: add x13, sp, #40 ; CHECK-NEXT: ld1 { v6.b }[1], [x14] @@ -2061,7 +2061,7 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: add x11, sp, #128 ; CHECK-NEXT: ld1 { v3.b }[3], [x13] ; CHECK-NEXT: ld1 { v0.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #680 +; CHECK-NEXT: add x9, sp, #552 ; CHECK-NEXT: add x13, sp, #184 ; CHECK-NEXT: ld1 { v7.b }[1], [x9] ; CHECK-NEXT: ld1 { v2.b }[5], [x13] @@ -2070,26 +2070,26 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: ld1 { v4.b }[2], [x13] ; CHECK-NEXT: add x10, sp, #136 ; CHECK-NEXT: ld1 { v0.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #560 +; CHECK-NEXT: add x11, sp, #688 ; CHECK-NEXT: ld1 { v5.b }[3], [x15] ; CHECK-NEXT: ld1 { v6.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #688 +; CHECK-NEXT: add x11, sp, #560 ; CHECK-NEXT: mov v1.b[3], w3 ; CHECK-NEXT: ld1 { v7.b }[2], [x11] ; CHECK-NEXT: add x9, sp, #632 ; CHECK-NEXT: add x11, sp, #512 ; CHECK-NEXT: ld1 { v0.b }[7], [x10] ; CHECK-NEXT: ld1 { v4.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #568 -; CHECK-NEXT: add x10, sp, #696 +; CHECK-NEXT: add x9, sp, #696 +; CHECK-NEXT: add x10, sp, #568 ; CHECK-NEXT: ld1 { v6.b }[3], [x9] ; CHECK-NEXT: ld1 { v5.b }[4], [x11] ; CHECK-NEXT: ld1 { v7.b }[3], [x10] ; CHECK-NEXT: add x9, sp, #640 ; CHECK-NEXT: mov v1.b[4], w4 ; CHECK-NEXT: ld1 { v4.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #576 -; CHECK-NEXT: add x10, sp, #704 +; CHECK-NEXT: add x9, sp, #704 +; CHECK-NEXT: add x10, sp, #576 ; CHECK-NEXT: add x11, sp, #520 ; CHECK-NEXT: ld1 { v6.b }[4], [x9] ; CHECK-NEXT: ldr b18, [sp, #736] @@ -2101,8 +2101,8 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: add x9, sp, #648 ; CHECK-NEXT: ld1 { v3.b }[4], [x8] ; CHECK-NEXT: add x10, sp, #528 -; CHECK-NEXT: add x11, sp, #584 -; CHECK-NEXT: add x12, sp, #712 +; CHECK-NEXT: add x11, sp, #712 +; CHECK-NEXT: add x12, sp, #584 ; CHECK-NEXT: sshll v18.8h, v18.8b, #0 ; CHECK-NEXT: mov v1.b[5], w5 ; CHECK-NEXT: ld1 { v6.b }[5], [x11] @@ -2114,8 +2114,8 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: ld1 { v3.b }[5], [x14] ; CHECK-NEXT: add x9, sp, #656 ; CHECK-NEXT: add x10, sp, #536 -; CHECK-NEXT: add x11, sp, #592 -; CHECK-NEXT: add x12, sp, #720 +; CHECK-NEXT: add x11, sp, #720 +; CHECK-NEXT: add x12, sp, #592 ; CHECK-NEXT: sshll v18.4s, v18.4h, #0 ; CHECK-NEXT: ldr b16, [sp, #208] ; CHECK-NEXT: ld1 { v6.b }[6], [x11] @@ -2127,8 +2127,8 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: sshll v16.8h, v16.8b, #0 ; CHECK-NEXT: ld1 { v3.b }[6], [x8] ; CHECK-NEXT: add x8, sp, #664 -; CHECK-NEXT: add x9, sp, #600 -; CHECK-NEXT: add x10, sp, #728 +; CHECK-NEXT: add x9, sp, #728 +; CHECK-NEXT: add x10, sp, #600 ; CHECK-NEXT: mov v17.s[0], v18.s[0] ; CHECK-NEXT: ld1 { v6.b }[7], [x9] ; CHECK-NEXT: ld1 { v7.b }[7], [x10] @@ -2151,7 +2151,7 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> % ; CHECK-NEXT: sshll v2.8h, v2.8b, #0 ; CHECK-NEXT: sshll v3.8h, v3.8b, #0 ; CHECK-NEXT: saddl2 v16.4s, v7.8h, v6.8h -; CHECK-NEXT: saddl2 v5.4s, v4.8h, v5.8h +; CHECK-NEXT: saddl2 v5.4s, v5.8h, v4.8h ; CHECK-NEXT: saddl v6.4s, v7.4h, v6.4h ; CHECK-NEXT: saddw v4.4s, v17.4s, v4.4h ; CHECK-NEXT: saddl2 v17.4s, v1.8h, v0.8h diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 364e8c7b38dac..42ea425f99c0a 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1843,3 +1843,152 @@ define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind { %r = or i8 %a, 240 ret i8 %r } + +define i64 @muland_demand(i64 %x) nounwind { +; RV32I-LABEL: muland_demand: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: andi a0, a0, -8 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: li a2, 12 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __muldi3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: muland_demand: +; RV32IM: # %bb.0: +; RV32IM-NEXT: andi a0, a0, -8 +; RV32IM-NEXT: li a2, 12 +; RV32IM-NEXT: mul a1, a1, a2 +; RV32IM-NEXT: mulhu a3, a0, a2 +; RV32IM-NEXT: add a1, a3, a1 +; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muland_demand: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -29 +; RV64I-NEXT: srli a1, a1, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: li a1, 12 +; RV64I-NEXT: tail __muldi3 +; +; RV64IM-LABEL: muland_demand: +; RV64IM: # %bb.0: +; RV64IM-NEXT: andi a0, a0, -8 +; RV64IM-NEXT: li a1, 12 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: ret + %and = and i64 %x, 4611686018427387896 + %mul = mul i64 %and, 12 + ret i64 %mul +} + +define i64 @mulzext_demand(i32 signext %x) nounwind { +; RV32I-LABEL: mulzext_demand: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __muldi3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: mulzext_demand: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 1 +; RV32IM-NEXT: add a1, a1, a0 +; RV32IM-NEXT: li a0, 0 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: mulzext_demand: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: tail __muldi3 +; +; RV64IM-LABEL: mulzext_demand: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 3 +; RV64IM-NEXT: slli a1, a1, 32 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: ret + %ext = zext i32 %x to i64 + %mul = mul i64 %ext, 12884901888 + ret i64 %mul +} + +define i32 @mulfshl_demand(i32 signext %x) nounwind { +; RV32I-LABEL: mulfshl_demand: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a0, a0, 11 +; RV32I-NEXT: lui a1, 92808 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: mulfshl_demand: +; RV32IM: # %bb.0: +; RV32IM-NEXT: srli a0, a0, 11 +; RV32IM-NEXT: lui a1, 92808 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: mulfshl_demand: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: srliw a0, a0, 11 +; RV64I-NEXT: lui a1, 92808 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: mulfshl_demand: +; RV64IM: # %bb.0: +; RV64IM-NEXT: srliw a0, a0, 11 +; RV64IM-NEXT: lui a1, 92808 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %fshl = tail call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 21) + %mul = mul i32 %fshl, 380141568 + ret i32 %mul +} + +define i32 @mulor_demand(i32 signext %x, i32 signext %y) nounwind { +; RV32I-LABEL: mulor_demand: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 92808 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: mulor_demand: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a1, 92808 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: mulor_demand: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a1, 92808 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: mulor_demand: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lui a1, 92808 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %mul1 = mul i32 %y, 10485760 + %or = or disjoint i32 %mul1, %x + %mul2 = mul i32 %or, 380141568 + ret i32 %mul2 +} diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll index 2db0d40b0ce52..cf7be57ccc901 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll @@ -637,8 +637,6 @@ define i64 @zext_mul288(i32 signext %a) { define i64 @zext_mul12884901888(i32 signext %a) { ; RV64I-LABEL: zext_mul12884901888: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: li a1, 3 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: mul a0, a0, a1 @@ -646,8 +644,8 @@ define i64 @zext_mul12884901888(i32 signext %a) { ; ; RV64ZBA-LABEL: zext_mul12884901888: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: sh1add a0, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 12884901888 @@ -658,8 +656,6 @@ define i64 @zext_mul12884901888(i32 signext %a) { define i64 @zext_mul21474836480(i32 signext %a) { ; RV64I-LABEL: zext_mul21474836480: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: li a1, 5 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: mul a0, a0, a1 @@ -667,8 +663,8 @@ define i64 @zext_mul21474836480(i32 signext %a) { ; ; RV64ZBA-LABEL: zext_mul21474836480: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: sh2add a0, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 21474836480 @@ -679,8 +675,6 @@ define i64 @zext_mul21474836480(i32 signext %a) { define i64 @zext_mul38654705664(i32 signext %a) { ; RV64I-LABEL: zext_mul38654705664: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: li a1, 9 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: mul a0, a0, a1 @@ -688,8 +682,8 @@ define i64 @zext_mul38654705664(i32 signext %a) { ; ; RV64ZBA-LABEL: zext_mul38654705664: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: sh3add a0, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 38654705664 diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index dc93c0215a25c..4a568fb2b25c8 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -856,8 +856,6 @@ define i64 @zext_mul288(i32 signext %a) { define i64 @zext_mul12884901888(i32 signext %a) { ; RV64I-LABEL: zext_mul12884901888: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: li a1, 3 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: mul a0, a0, a1 @@ -865,8 +863,8 @@ define i64 @zext_mul12884901888(i32 signext %a) { ; ; RV64ZBA-LABEL: zext_mul12884901888: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: sh1add a0, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 12884901888 @@ -877,8 +875,6 @@ define i64 @zext_mul12884901888(i32 signext %a) { define i64 @zext_mul21474836480(i32 signext %a) { ; RV64I-LABEL: zext_mul21474836480: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: li a1, 5 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: mul a0, a0, a1 @@ -886,8 +882,8 @@ define i64 @zext_mul21474836480(i32 signext %a) { ; ; RV64ZBA-LABEL: zext_mul21474836480: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: sh2add a0, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 21474836480 @@ -898,8 +894,6 @@ define i64 @zext_mul21474836480(i32 signext %a) { define i64 @zext_mul38654705664(i32 signext %a) { ; RV64I-LABEL: zext_mul38654705664: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: li a1, 9 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: mul a0, a0, a1 @@ -907,8 +901,8 @@ define i64 @zext_mul38654705664(i32 signext %a) { ; ; RV64ZBA-LABEL: zext_mul38654705664: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: sh3add a0, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 32 ; RV64ZBA-NEXT: ret %b = zext i32 %a to i64 %c = mul i64 %b, 38654705664 diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index f707cb31e3ece..8cf78551d28f9 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -1047,25 +1047,25 @@ define signext i32 @bug(i32 signext %x) { ; CHECK-NEXT: seqz a2, a2 ; CHECK-NEXT: slli a3, a2, 3 ; CHECK-NEXT: sllw a1, a1, a3 -; CHECK-NEXT: neg a2, a2 +; CHECK-NEXT: negw a2, a2 ; CHECK-NEXT: andi a2, a2, -8 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: srliw a2, a1, 28 ; CHECK-NEXT: seqz a2, a2 ; CHECK-NEXT: slli a3, a2, 2 ; CHECK-NEXT: sllw a1, a1, a3 -; CHECK-NEXT: neg a2, a2 +; CHECK-NEXT: negw a2, a2 ; CHECK-NEXT: andi a2, a2, -4 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: srliw a2, a1, 30 ; CHECK-NEXT: seqz a2, a2 ; CHECK-NEXT: slli a3, a2, 1 ; CHECK-NEXT: sllw a1, a1, a3 -; CHECK-NEXT: neg a2, a2 +; CHECK-NEXT: negw a2, a2 ; CHECK-NEXT: andi a2, a2, -2 ; CHECK-NEXT: add a0, a0, a2 -; CHECK-NEXT: srai a1, a1, 31 ; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: srli a1, a1, 31 ; CHECK-NEXT: addw a0, a0, a1 ; CHECK-NEXT: .LBB18_4: # %cleanup ; CHECK-NEXT: ret @@ -1087,28 +1087,27 @@ define signext i32 @bug(i32 signext %x) { ; NOREMOVAL-NEXT: seqz a2, a2 ; NOREMOVAL-NEXT: slli a3, a2, 3 ; NOREMOVAL-NEXT: sllw a1, a1, a3 -; NOREMOVAL-NEXT: neg a2, a2 +; NOREMOVAL-NEXT: negw a2, a2 ; NOREMOVAL-NEXT: andi a2, a2, -8 ; NOREMOVAL-NEXT: add a0, a0, a2 ; NOREMOVAL-NEXT: srliw a2, a1, 28 ; NOREMOVAL-NEXT: seqz a2, a2 ; NOREMOVAL-NEXT: slli a3, a2, 2 ; NOREMOVAL-NEXT: sllw a1, a1, a3 -; NOREMOVAL-NEXT: neg a2, a2 +; NOREMOVAL-NEXT: negw a2, a2 ; NOREMOVAL-NEXT: andi a2, a2, -4 ; NOREMOVAL-NEXT: add a0, a0, a2 ; NOREMOVAL-NEXT: srliw a2, a1, 30 ; NOREMOVAL-NEXT: seqz a2, a2 ; NOREMOVAL-NEXT: slli a3, a2, 1 ; NOREMOVAL-NEXT: sllw a1, a1, a3 -; NOREMOVAL-NEXT: neg a2, a2 +; NOREMOVAL-NEXT: negw a2, a2 ; NOREMOVAL-NEXT: andi a2, a2, -2 ; NOREMOVAL-NEXT: add a0, a0, a2 -; NOREMOVAL-NEXT: srai a1, a1, 31 ; NOREMOVAL-NEXT: not a1, a1 -; NOREMOVAL-NEXT: add a0, a0, a1 +; NOREMOVAL-NEXT: srli a1, a1, 31 +; NOREMOVAL-NEXT: addw a0, a0, a1 ; NOREMOVAL-NEXT: .LBB18_4: # %cleanup -; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: ret entry: %tobool.not = icmp eq i32 %x, 0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll index 83d7275358ce3..3300d46bf8561 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll @@ -130,26 +130,26 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_zext(<8 x i16> %x) { ; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov.u16 r3, q0[2] -; CHECK-NEXT: vmov r0, r1, d5 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 +; CHECK-NEXT: vmov r0, s10 +; CHECK-NEXT: vmov r1, r2, d4 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 ; CHECK-NEXT: vmov.u16 r3, q0[4] ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 +; CHECK-NEXT: vmov r1, s8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov r2, r3, d5 -; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: vmov r1, s8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov r1, r3, d5 +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: adc.w r1, r2, r3 ; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: vmov.u16 r3, q0[6] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 ; CHECK-NEXT: vand q0, q0, q1 @@ -228,8 +228,8 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov r2, r1, d0 ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: bx lr entry: @@ -397,26 +397,26 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_zext(<16 x i8> %x) { ; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov.u8 r3, q0[2] -; CHECK-NEXT: vmov r0, r1, d5 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov.u8 r2, q0[3] -; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 +; CHECK-NEXT: vmov r0, s10 +; CHECK-NEXT: vmov r1, r2, d4 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov.u8 r1, q0[3] +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 ; CHECK-NEXT: vmov.u8 r3, q0[4] ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov.u8 r2, q0[5] -; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 +; CHECK-NEXT: vmov r1, s8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov.u8 r1, q0[5] +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov r2, r3, d5 -; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: vmov r1, s8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov r1, r3, d5 +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: adc.w r1, r2, r3 ; CHECK-NEXT: vmov.u8 r2, q0[7] -; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: vmov.u8 r3, q0[6] ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 ; CHECK-NEXT: vand q2, q2, q1 @@ -540,26 +540,26 @@ define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x) { ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0 ; CHECK-NEXT: vmov.u16 r3, q0[2] ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r0, r1, d5 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 +; CHECK-NEXT: vmov r0, s10 +; CHECK-NEXT: vmov r1, r2, d4 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov.u16 r1, q0[3] +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 ; CHECK-NEXT: vmov.u16 r3, q0[4] ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 +; CHECK-NEXT: vmov r1, s8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov r1, s10 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov.u16 r1, q0[5] +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, s8 -; CHECK-NEXT: add r0, r2 -; CHECK-NEXT: vmov r2, r3, d5 -; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: vmov r1, s8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov r1, r3, d5 +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: adc.w r1, r2, r3 ; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: vmov.u16 r3, q0[6] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 ; CHECK-NEXT: vand q0, q0, q1 @@ -648,8 +648,8 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov r2, r1, d0 ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: bx lr entry: @@ -834,8 +834,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) { ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 ; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, r12, d5 -; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov r3, r12, d4 ; CHECK-NEXT: add.w lr, r3, r2 ; CHECK-NEXT: vmov.u16 r3, q0[3] ; CHECK-NEXT: vmov.u16 r2, q0[2] @@ -943,8 +943,8 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov r3, r12, d0 ; CHECK-NEXT: add r2, r3 ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adc.w r1, r1, r12 @@ -1130,8 +1130,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) { ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 ; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, r12, d5 -; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov r3, r12, d4 ; CHECK-NEXT: add.w lr, r3, r2 ; CHECK-NEXT: vmov.u8 r3, q0[3] ; CHECK-NEXT: vmov.u8 r2, q0[2] @@ -1283,8 +1283,8 @@ define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) { ; CHECK-NEXT: vmov.u16 r3, q0[0] ; CHECK-NEXT: vmov q2[2], q2[0], r3, r2 ; CHECK-NEXT: vand q2, q2, q1 -; CHECK-NEXT: vmov r2, r12, d5 -; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov r3, r12, d4 ; CHECK-NEXT: add.w lr, r3, r2 ; CHECK-NEXT: vmov.u16 r3, q0[3] ; CHECK-NEXT: vmov.u16 r2, q0[2] @@ -1402,8 +1402,8 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov r3, r12, d0 ; CHECK-NEXT: add r2, r3 ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adc.w r1, r1, r12 diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll index 49ce2455ae8c7..4ed00a9d66bd3 100644 --- a/llvm/test/CodeGen/X86/combine-srem.ll +++ b/llvm/test/CodeGen/X86/combine-srem.ll @@ -329,7 +329,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) { ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: movdqa %xmm1, %xmm2 ; SSE-NEXT: psrad $3, %xmm2 -; SSE-NEXT: psrad $1, %xmm1 +; SSE-NEXT: psrld $1, %xmm1 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] ; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -351,7 +351,7 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) { ; AVX1-NEXT: vpsrad $2, %xmm1, %xmm3 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vpsrad $3, %xmm1, %xmm3 -; AVX1-NEXT: vpsrad $1, %xmm1, %xmm1 +; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index dcded7a877abb..1f82c4a5a2d92 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -1173,13 +1173,14 @@ define <4 x i32> @mul_v4i64_zero_lower(<4 x i32> %val1, <4 x i64> %val2) { ; ; SSE41-LABEL: mul_v4i64_zero_lower: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,3,3] ; SSE41-NEXT: psrlq $32, %xmm2 -; SSE41-NEXT: pmuludq %xmm3, %xmm2 +; SSE41-NEXT: pmuludq %xmm0, %xmm2 ; SSE41-NEXT: psrlq $32, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; SSE41-NEXT: pmuludq %xmm1, %xmm0 -; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; SSE41-NEXT: pmuludq %xmm1, %xmm3 +; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2] +; SSE41-NEXT: movaps %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: mul_v4i64_zero_lower: diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index 62051d1709940..f3f7f0515e306 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1863,7 +1863,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SSE-NEXT: movl c, %edx ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: psrld $16, %xmm0 +; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; X86-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: psllq $32, %xmm0 ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) @@ -1884,7 +1884,7 @@ define void @mul_2xi16_varconst3(ptr nocapture readonly %a, i64 %index) { ; X64-SSE: # %bb.0: # %entry ; X64-SSE-NEXT: movq c(%rip), %rax ; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-SSE-NEXT: psrld $16, %xmm0 +; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; X64-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE-NEXT: psllq $32, %xmm0 ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4)