diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7b00b2514c4ef..56c9ba67bb35e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16984,6 +16984,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return Op0.getOperand(0); } + if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() && + cast(Op0)->isSimple()) { + MVT IVT = MVT::getIntegerVT(Op0.getValueSizeInBits()); + auto *LN0 = cast(Op0); + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), + LN0->getBasePtr(), IVT, LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1)); + return Load; + } + // This is a target-specific version of a DAGCombine performed in // DAGCombiner::visitBITCAST. It performs the equivalent of: // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index 8e2fdfc4ba94c..ca40ba0399973 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -246,32 +246,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind { define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-LABEL: caller_half_32: ; ZHINX32: # %bb.0: -; ZHINX32-NEXT: addi sp, sp, -112 -; ZHINX32-NEXT: sw ra, 108(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s0, 104(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s1, 100(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s2, 96(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s3, 92(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s4, 88(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s5, 84(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s6, 80(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s7, 76(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s8, 72(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 124(sp) -; ZHINX32-NEXT: sw t0, 56(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 120(sp) -; ZHINX32-NEXT: sw t0, 52(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 116(sp) -; ZHINX32-NEXT: sw t0, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: addi sp, sp, -96 +; ZHINX32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s4, 72(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s5, 68(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s6, 64(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s7, 60(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s8, 56(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s9, 52(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s10, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s11, 44(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t0, 112(sp) -; ZHINX32-NEXT: sw t0, 44(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t6, 128(sp) -; ZHINX32-NEXT: lh t5, 132(sp) -; ZHINX32-NEXT: lh t4, 136(sp) -; ZHINX32-NEXT: lh s0, 140(sp) +; ZHINX32-NEXT: lh t1, 116(sp) +; ZHINX32-NEXT: lh t2, 120(sp) +; ZHINX32-NEXT: lh s0, 124(sp) +; ZHINX32-NEXT: lh t3, 128(sp) +; ZHINX32-NEXT: lh t4, 132(sp) +; ZHINX32-NEXT: lh t5, 136(sp) +; ZHINX32-NEXT: lh t6, 140(sp) ; ZHINX32-NEXT: lh s1, 144(sp) ; ZHINX32-NEXT: lh s2, 148(sp) ; ZHINX32-NEXT: lh s3, 152(sp) @@ -284,79 +280,71 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: lh t3, 192(sp) -; ZHINX32-NEXT: lh t2, 196(sp) -; ZHINX32-NEXT: lh t1, 200(sp) -; ZHINX32-NEXT: lh t0, 204(sp) -; ZHINX32-NEXT: sh t0, 38(sp) -; ZHINX32-NEXT: sh t1, 36(sp) -; ZHINX32-NEXT: sh t2, 34(sp) -; ZHINX32-NEXT: sh t3, 32(sp) -; ZHINX32-NEXT: sh ra, 30(sp) -; ZHINX32-NEXT: sh s11, 28(sp) -; ZHINX32-NEXT: sh s10, 26(sp) -; ZHINX32-NEXT: sh s9, 24(sp) -; ZHINX32-NEXT: sh s8, 22(sp) -; ZHINX32-NEXT: sh s7, 20(sp) -; ZHINX32-NEXT: sh s6, 18(sp) -; ZHINX32-NEXT: sh s5, 16(sp) -; ZHINX32-NEXT: sh s4, 14(sp) -; ZHINX32-NEXT: sh s3, 12(sp) -; ZHINX32-NEXT: sh s2, 10(sp) -; ZHINX32-NEXT: sh s1, 8(sp) +; ZHINX32-NEXT: sh ra, 38(sp) +; ZHINX32-NEXT: sh s11, 36(sp) +; ZHINX32-NEXT: sh s10, 34(sp) +; ZHINX32-NEXT: sh s9, 32(sp) +; ZHINX32-NEXT: sh s8, 30(sp) +; ZHINX32-NEXT: sh s7, 28(sp) +; ZHINX32-NEXT: sh s6, 26(sp) +; ZHINX32-NEXT: sh s5, 24(sp) +; ZHINX32-NEXT: sh s4, 22(sp) +; ZHINX32-NEXT: sh s3, 20(sp) +; ZHINX32-NEXT: sh s2, 18(sp) +; ZHINX32-NEXT: sh s1, 16(sp) +; ZHINX32-NEXT: sh t6, 14(sp) +; ZHINX32-NEXT: sh t5, 12(sp) +; ZHINX32-NEXT: sh t4, 10(sp) +; ZHINX32-NEXT: sh t3, 8(sp) +; ZHINX32-NEXT: lh t3, 96(sp) +; ZHINX32-NEXT: lh t4, 100(sp) +; ZHINX32-NEXT: lh t5, 104(sp) +; ZHINX32-NEXT: lh t6, 108(sp) ; ZHINX32-NEXT: sh s0, 6(sp) -; ZHINX32-NEXT: sh t4, 4(sp) -; ZHINX32-NEXT: sh t5, 2(sp) -; ZHINX32-NEXT: sh t6, 0(sp) -; ZHINX32-NEXT: lw t3, 44(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t4, 48(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t5, 52(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t6, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: sh t2, 4(sp) +; ZHINX32-NEXT: sh t1, 2(sp) +; ZHINX32-NEXT: sh t0, 0(sp) ; ZHINX32-NEXT: call callee_half_32 -; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s1, 100(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s2, 96(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s3, 92(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s4, 88(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s5, 84(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s6, 80(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s7, 76(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s8, 72(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s9, 68(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s10, 64(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s11, 60(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: addi sp, sp, 112 +; ZHINX32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s4, 72(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s5, 68(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s6, 64(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s7, 60(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s8, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s9, 52(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s10, 48(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s11, 44(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: addi sp, sp, 96 ; ZHINX32-NEXT: ret ; ; ZHINX64-LABEL: caller_half_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -176 -; ZHINX64-NEXT: sd ra, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 80(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 72(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 200(sp) -; ZHINX64-NEXT: sd t0, 64(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 192(sp) -; ZHINX64-NEXT: sd t0, 56(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 184(sp) -; ZHINX64-NEXT: sd t0, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -144 +; ZHINX64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lh t0, 176(sp) -; ZHINX64-NEXT: sd t0, 40(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t6, 208(sp) -; ZHINX64-NEXT: lh t5, 216(sp) -; ZHINX64-NEXT: lh t4, 224(sp) -; ZHINX64-NEXT: lh s0, 232(sp) +; ZHINX64-NEXT: lh t1, 184(sp) +; ZHINX64-NEXT: lh t2, 192(sp) +; ZHINX64-NEXT: lh s0, 200(sp) +; ZHINX64-NEXT: lh t3, 208(sp) +; ZHINX64-NEXT: lh t4, 216(sp) +; ZHINX64-NEXT: lh t5, 224(sp) +; ZHINX64-NEXT: lh t6, 232(sp) ; ZHINX64-NEXT: lh s1, 240(sp) ; ZHINX64-NEXT: lh s2, 248(sp) ; ZHINX64-NEXT: lh s3, 256(sp) @@ -369,49 +357,45 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: lh s10, 312(sp) ; ZHINX64-NEXT: lh s11, 320(sp) ; ZHINX64-NEXT: lh ra, 328(sp) -; ZHINX64-NEXT: lh t3, 336(sp) -; ZHINX64-NEXT: lh t2, 344(sp) -; ZHINX64-NEXT: lh t1, 352(sp) -; ZHINX64-NEXT: lh t0, 360(sp) -; ZHINX64-NEXT: sh t0, 38(sp) -; ZHINX64-NEXT: sh t1, 36(sp) -; ZHINX64-NEXT: sh t2, 34(sp) -; ZHINX64-NEXT: sh t3, 32(sp) -; ZHINX64-NEXT: sh ra, 30(sp) -; ZHINX64-NEXT: sh s11, 28(sp) -; ZHINX64-NEXT: sh s10, 26(sp) -; ZHINX64-NEXT: sh s9, 24(sp) -; ZHINX64-NEXT: sh s8, 22(sp) -; ZHINX64-NEXT: sh s7, 20(sp) -; ZHINX64-NEXT: sh s6, 18(sp) -; ZHINX64-NEXT: sh s5, 16(sp) -; ZHINX64-NEXT: sh s4, 14(sp) -; ZHINX64-NEXT: sh s3, 12(sp) -; ZHINX64-NEXT: sh s2, 10(sp) -; ZHINX64-NEXT: sh s1, 8(sp) +; ZHINX64-NEXT: sh ra, 38(sp) +; ZHINX64-NEXT: sh s11, 36(sp) +; ZHINX64-NEXT: sh s10, 34(sp) +; ZHINX64-NEXT: sh s9, 32(sp) +; ZHINX64-NEXT: sh s8, 30(sp) +; ZHINX64-NEXT: sh s7, 28(sp) +; ZHINX64-NEXT: sh s6, 26(sp) +; ZHINX64-NEXT: sh s5, 24(sp) +; ZHINX64-NEXT: sh s4, 22(sp) +; ZHINX64-NEXT: sh s3, 20(sp) +; ZHINX64-NEXT: sh s2, 18(sp) +; ZHINX64-NEXT: sh s1, 16(sp) +; ZHINX64-NEXT: sh t6, 14(sp) +; ZHINX64-NEXT: sh t5, 12(sp) +; ZHINX64-NEXT: sh t4, 10(sp) +; ZHINX64-NEXT: sh t3, 8(sp) +; ZHINX64-NEXT: lh t3, 144(sp) +; ZHINX64-NEXT: lh t4, 152(sp) +; ZHINX64-NEXT: lh t5, 160(sp) +; ZHINX64-NEXT: lh t6, 168(sp) ; ZHINX64-NEXT: sh s0, 6(sp) -; ZHINX64-NEXT: sh t4, 4(sp) -; ZHINX64-NEXT: sh t5, 2(sp) -; ZHINX64-NEXT: sh t6, 0(sp) -; ZHINX64-NEXT: ld t3, 40(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 48(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 56(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sh t2, 4(sp) +; ZHINX64-NEXT: sh t1, 2(sp) +; ZHINX64-NEXT: sh t0, 0(sp) ; ZHINX64-NEXT: call callee_half_32 -; ZHINX64-NEXT: ld ra, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 80(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 72(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 176 +; ZHINX64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 144 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_half_32: @@ -917,32 +901,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZHINX64-LABEL: caller_float_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -224 -; ZHINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 248(sp) -; ZHINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 240(sp) -; ZHINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 232(sp) -; ZHINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -192 +; ZHINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lw t0, 224(sp) -; ZHINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t6, 256(sp) -; ZHINX64-NEXT: lw t5, 264(sp) -; ZHINX64-NEXT: lw t4, 272(sp) -; ZHINX64-NEXT: lw s0, 280(sp) +; ZHINX64-NEXT: lw t1, 232(sp) +; ZHINX64-NEXT: lw t2, 240(sp) +; ZHINX64-NEXT: lw s0, 248(sp) +; ZHINX64-NEXT: lw t3, 256(sp) +; ZHINX64-NEXT: lw t4, 264(sp) +; ZHINX64-NEXT: lw t5, 272(sp) +; ZHINX64-NEXT: lw t6, 280(sp) ; ZHINX64-NEXT: lw s1, 288(sp) ; ZHINX64-NEXT: lw s2, 296(sp) ; ZHINX64-NEXT: lw s3, 304(sp) @@ -955,49 +935,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: lw s10, 360(sp) ; ZHINX64-NEXT: lw s11, 368(sp) ; ZHINX64-NEXT: lw ra, 376(sp) -; ZHINX64-NEXT: lw t3, 384(sp) -; ZHINX64-NEXT: lw t2, 392(sp) -; ZHINX64-NEXT: lw t1, 400(sp) -; ZHINX64-NEXT: lw t0, 408(sp) -; ZHINX64-NEXT: sw t0, 76(sp) -; ZHINX64-NEXT: sw t1, 72(sp) -; ZHINX64-NEXT: sw t2, 68(sp) -; ZHINX64-NEXT: sw t3, 64(sp) -; ZHINX64-NEXT: sw ra, 60(sp) -; ZHINX64-NEXT: sw s11, 56(sp) -; ZHINX64-NEXT: sw s10, 52(sp) -; ZHINX64-NEXT: sw s9, 48(sp) -; ZHINX64-NEXT: sw s8, 44(sp) -; ZHINX64-NEXT: sw s7, 40(sp) -; ZHINX64-NEXT: sw s6, 36(sp) -; ZHINX64-NEXT: sw s5, 32(sp) -; ZHINX64-NEXT: sw s4, 28(sp) -; ZHINX64-NEXT: sw s3, 24(sp) -; ZHINX64-NEXT: sw s2, 20(sp) -; ZHINX64-NEXT: sw s1, 16(sp) +; ZHINX64-NEXT: sw ra, 76(sp) +; ZHINX64-NEXT: sw s11, 72(sp) +; ZHINX64-NEXT: sw s10, 68(sp) +; ZHINX64-NEXT: sw s9, 64(sp) +; ZHINX64-NEXT: sw s8, 60(sp) +; ZHINX64-NEXT: sw s7, 56(sp) +; ZHINX64-NEXT: sw s6, 52(sp) +; ZHINX64-NEXT: sw s5, 48(sp) +; ZHINX64-NEXT: sw s4, 44(sp) +; ZHINX64-NEXT: sw s3, 40(sp) +; ZHINX64-NEXT: sw s2, 36(sp) +; ZHINX64-NEXT: sw s1, 32(sp) +; ZHINX64-NEXT: sw t6, 28(sp) +; ZHINX64-NEXT: sw t5, 24(sp) +; ZHINX64-NEXT: sw t4, 20(sp) +; ZHINX64-NEXT: sw t3, 16(sp) +; ZHINX64-NEXT: lw t3, 192(sp) +; ZHINX64-NEXT: lw t4, 200(sp) +; ZHINX64-NEXT: lw t5, 208(sp) +; ZHINX64-NEXT: lw t6, 216(sp) ; ZHINX64-NEXT: sw s0, 12(sp) -; ZHINX64-NEXT: sw t4, 8(sp) -; ZHINX64-NEXT: sw t5, 4(sp) -; ZHINX64-NEXT: sw t6, 0(sp) -; ZHINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sw t2, 8(sp) +; ZHINX64-NEXT: sw t1, 4(sp) +; ZHINX64-NEXT: sw t0, 0(sp) ; ZHINX64-NEXT: call callee_float_32 -; ZHINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 224 +; ZHINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 192 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_float_32: @@ -1087,32 +1063,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZFINX64-LABEL: caller_float_32: ; ZFINX64: # %bb.0: -; ZFINX64-NEXT: addi sp, sp, -224 -; ZFINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 248(sp) -; ZFINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 240(sp) -; ZFINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 232(sp) -; ZFINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: addi sp, sp, -192 +; ZFINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: lw t0, 224(sp) -; ZFINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t6, 256(sp) -; ZFINX64-NEXT: lw t5, 264(sp) -; ZFINX64-NEXT: lw t4, 272(sp) -; ZFINX64-NEXT: lw s0, 280(sp) +; ZFINX64-NEXT: lw t1, 232(sp) +; ZFINX64-NEXT: lw t2, 240(sp) +; ZFINX64-NEXT: lw s0, 248(sp) +; ZFINX64-NEXT: lw t3, 256(sp) +; ZFINX64-NEXT: lw t4, 264(sp) +; ZFINX64-NEXT: lw t5, 272(sp) +; ZFINX64-NEXT: lw t6, 280(sp) ; ZFINX64-NEXT: lw s1, 288(sp) ; ZFINX64-NEXT: lw s2, 296(sp) ; ZFINX64-NEXT: lw s3, 304(sp) @@ -1125,49 +1097,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: lw s10, 360(sp) ; ZFINX64-NEXT: lw s11, 368(sp) ; ZFINX64-NEXT: lw ra, 376(sp) -; ZFINX64-NEXT: lw t3, 384(sp) -; ZFINX64-NEXT: lw t2, 392(sp) -; ZFINX64-NEXT: lw t1, 400(sp) -; ZFINX64-NEXT: lw t0, 408(sp) -; ZFINX64-NEXT: sw t0, 76(sp) -; ZFINX64-NEXT: sw t1, 72(sp) -; ZFINX64-NEXT: sw t2, 68(sp) -; ZFINX64-NEXT: sw t3, 64(sp) -; ZFINX64-NEXT: sw ra, 60(sp) -; ZFINX64-NEXT: sw s11, 56(sp) -; ZFINX64-NEXT: sw s10, 52(sp) -; ZFINX64-NEXT: sw s9, 48(sp) -; ZFINX64-NEXT: sw s8, 44(sp) -; ZFINX64-NEXT: sw s7, 40(sp) -; ZFINX64-NEXT: sw s6, 36(sp) -; ZFINX64-NEXT: sw s5, 32(sp) -; ZFINX64-NEXT: sw s4, 28(sp) -; ZFINX64-NEXT: sw s3, 24(sp) -; ZFINX64-NEXT: sw s2, 20(sp) -; ZFINX64-NEXT: sw s1, 16(sp) +; ZFINX64-NEXT: sw ra, 76(sp) +; ZFINX64-NEXT: sw s11, 72(sp) +; ZFINX64-NEXT: sw s10, 68(sp) +; ZFINX64-NEXT: sw s9, 64(sp) +; ZFINX64-NEXT: sw s8, 60(sp) +; ZFINX64-NEXT: sw s7, 56(sp) +; ZFINX64-NEXT: sw s6, 52(sp) +; ZFINX64-NEXT: sw s5, 48(sp) +; ZFINX64-NEXT: sw s4, 44(sp) +; ZFINX64-NEXT: sw s3, 40(sp) +; ZFINX64-NEXT: sw s2, 36(sp) +; ZFINX64-NEXT: sw s1, 32(sp) +; ZFINX64-NEXT: sw t6, 28(sp) +; ZFINX64-NEXT: sw t5, 24(sp) +; ZFINX64-NEXT: sw t4, 20(sp) +; ZFINX64-NEXT: sw t3, 16(sp) +; ZFINX64-NEXT: lw t3, 192(sp) +; ZFINX64-NEXT: lw t4, 200(sp) +; ZFINX64-NEXT: lw t5, 208(sp) +; ZFINX64-NEXT: lw t6, 216(sp) ; ZFINX64-NEXT: sw s0, 12(sp) -; ZFINX64-NEXT: sw t4, 8(sp) -; ZFINX64-NEXT: sw t5, 4(sp) -; ZFINX64-NEXT: sw t6, 0(sp) -; ZFINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: sw t2, 8(sp) +; ZFINX64-NEXT: sw t1, 4(sp) +; ZFINX64-NEXT: sw t0, 0(sp) ; ZFINX64-NEXT: call callee_float_32 -; ZFINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: addi sp, sp, 224 +; ZFINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: addi sp, sp, 192 ; ZFINX64-NEXT: ret ; ; ZDINX32-LABEL: caller_float_32: @@ -1257,32 +1225,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZDINX64-LABEL: caller_float_32: ; ZDINX64: # %bb.0: -; ZDINX64-NEXT: addi sp, sp, -224 -; ZDINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 248(sp) -; ZDINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 240(sp) -; ZDINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 232(sp) -; ZDINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: addi sp, sp, -192 +; ZDINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: lw t0, 224(sp) -; ZDINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t6, 256(sp) -; ZDINX64-NEXT: lw t5, 264(sp) -; ZDINX64-NEXT: lw t4, 272(sp) -; ZDINX64-NEXT: lw s0, 280(sp) +; ZDINX64-NEXT: lw t1, 232(sp) +; ZDINX64-NEXT: lw t2, 240(sp) +; ZDINX64-NEXT: lw s0, 248(sp) +; ZDINX64-NEXT: lw t3, 256(sp) +; ZDINX64-NEXT: lw t4, 264(sp) +; ZDINX64-NEXT: lw t5, 272(sp) +; ZDINX64-NEXT: lw t6, 280(sp) ; ZDINX64-NEXT: lw s1, 288(sp) ; ZDINX64-NEXT: lw s2, 296(sp) ; ZDINX64-NEXT: lw s3, 304(sp) @@ -1295,49 +1259,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: lw s10, 360(sp) ; ZDINX64-NEXT: lw s11, 368(sp) ; ZDINX64-NEXT: lw ra, 376(sp) -; ZDINX64-NEXT: lw t3, 384(sp) -; ZDINX64-NEXT: lw t2, 392(sp) -; ZDINX64-NEXT: lw t1, 400(sp) -; ZDINX64-NEXT: lw t0, 408(sp) -; ZDINX64-NEXT: sw t0, 76(sp) -; ZDINX64-NEXT: sw t1, 72(sp) -; ZDINX64-NEXT: sw t2, 68(sp) -; ZDINX64-NEXT: sw t3, 64(sp) -; ZDINX64-NEXT: sw ra, 60(sp) -; ZDINX64-NEXT: sw s11, 56(sp) -; ZDINX64-NEXT: sw s10, 52(sp) -; ZDINX64-NEXT: sw s9, 48(sp) -; ZDINX64-NEXT: sw s8, 44(sp) -; ZDINX64-NEXT: sw s7, 40(sp) -; ZDINX64-NEXT: sw s6, 36(sp) -; ZDINX64-NEXT: sw s5, 32(sp) -; ZDINX64-NEXT: sw s4, 28(sp) -; ZDINX64-NEXT: sw s3, 24(sp) -; ZDINX64-NEXT: sw s2, 20(sp) -; ZDINX64-NEXT: sw s1, 16(sp) +; ZDINX64-NEXT: sw ra, 76(sp) +; ZDINX64-NEXT: sw s11, 72(sp) +; ZDINX64-NEXT: sw s10, 68(sp) +; ZDINX64-NEXT: sw s9, 64(sp) +; ZDINX64-NEXT: sw s8, 60(sp) +; ZDINX64-NEXT: sw s7, 56(sp) +; ZDINX64-NEXT: sw s6, 52(sp) +; ZDINX64-NEXT: sw s5, 48(sp) +; ZDINX64-NEXT: sw s4, 44(sp) +; ZDINX64-NEXT: sw s3, 40(sp) +; ZDINX64-NEXT: sw s2, 36(sp) +; ZDINX64-NEXT: sw s1, 32(sp) +; ZDINX64-NEXT: sw t6, 28(sp) +; ZDINX64-NEXT: sw t5, 24(sp) +; ZDINX64-NEXT: sw t4, 20(sp) +; ZDINX64-NEXT: sw t3, 16(sp) +; ZDINX64-NEXT: lw t3, 192(sp) +; ZDINX64-NEXT: lw t4, 200(sp) +; ZDINX64-NEXT: lw t5, 208(sp) +; ZDINX64-NEXT: lw t6, 216(sp) ; ZDINX64-NEXT: sw s0, 12(sp) -; ZDINX64-NEXT: sw t4, 8(sp) -; ZDINX64-NEXT: sw t5, 4(sp) -; ZDINX64-NEXT: sw t6, 0(sp) -; ZDINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: sw t2, 8(sp) +; ZDINX64-NEXT: sw t1, 4(sp) +; ZDINX64-NEXT: sw t0, 0(sp) ; ZDINX64-NEXT: call callee_float_32 -; ZDINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: addi sp, sp, 224 +; ZDINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: addi sp, sp, 192 ; ZDINX64-NEXT: ret %C = call fastcc float @callee_float_32(<32 x float> %A) ret float %C diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index b033c75eeadd8..27829f2b65759 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -2877,14 +2877,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV32IZFHMIN-LABEL: fsgnjx_f16: ; RV32IZFHMIN: # %bb.0: ; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV32IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV32IZFHMIN-NEXT: lui a1, 1048568 -; RV32IZFHMIN-NEXT: and a0, a0, a1 -; RV32IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV32IZFHMIN-NEXT: slli a1, a1, 17 -; RV32IZFHMIN-NEXT: srli a1, a1, 17 -; RV32IZFHMIN-NEXT: or a0, a1, a0 +; RV32IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32IZFHMIN-NEXT: lui a2, 1048568 +; RV32IZFHMIN-NEXT: and a1, a1, a2 +; RV32IZFHMIN-NEXT: slli a0, a0, 17 +; RV32IZFHMIN-NEXT: srli a0, a0, 17 +; RV32IZFHMIN-NEXT: or a0, a0, a1 ; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa1 @@ -2895,14 +2894,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV64IZFHMIN-LABEL: fsgnjx_f16: ; RV64IZFHMIN: # %bb.0: ; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV64IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV64IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV64IZFHMIN-NEXT: lui a1, 1048568 -; RV64IZFHMIN-NEXT: and a0, a0, a1 -; RV64IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV64IZFHMIN-NEXT: slli a1, a1, 49 -; RV64IZFHMIN-NEXT: srli a1, a1, 49 -; RV64IZFHMIN-NEXT: or a0, a1, a0 +; RV64IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64IZFHMIN-NEXT: lui a2, 1048568 +; RV64IZFHMIN-NEXT: and a1, a1, a2 +; RV64IZFHMIN-NEXT: slli a0, a0, 49 +; RV64IZFHMIN-NEXT: srli a0, a0, 49 +; RV64IZFHMIN-NEXT: or a0, a0, a1 ; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll index 170e71af09b49..727e03125176a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll @@ -40,8 +40,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFBFMIN-NEXT: csrr a0, vlenb ; RV32-ZFBFMIN-NEXT: slli a0, a0, 1 @@ -71,8 +70,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFBFMIN-NEXT: csrr a0, vlenb ; RV64-ZFBFMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index b5d3e2cd776f2..bf2eb3ff0261a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -220,8 +220,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFHMIN-NEXT: lh a0, 0(a0) ; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFHMIN-NEXT: csrr a0, vlenb ; RV32-ZFHMIN-NEXT: slli a0, a0, 1 @@ -251,8 +250,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFHMIN-NEXT: lh a0, 0(a0) ; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFHMIN-NEXT: csrr a0, vlenb ; RV64-ZFHMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 5ab8eab091c2e..d665d23dec68a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -516,41 +516,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 @@ -564,41 +556,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 @@ -628,41 +612,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) @@ -678,41 +654,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) @@ -898,71 +866,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -982,71 +934,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1202,71 +1138,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -1288,71 +1208,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1521,50 +1425,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a3, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1580,50 +1476,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a3, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1752,54 +1640,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a4, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) @@ -1815,54 +1695,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a4, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) @@ -2051,77 +1923,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2136,77 +1992,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2360,78 +2200,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2447,78 +2271,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2678,38 +2486,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0) @@ -2730,38 +2530,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0) @@ -2885,38 +2677,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma @@ -2939,38 +2723,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma