Skip to content

Commit e777872

Browse files
committed
[X86] Lower mathlib call ldexp into scalef when avx512 is enabled #165694
1 parent bff2aa6 commit e777872

File tree

3 files changed

+365
-492
lines changed

3 files changed

+365
-492
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,6 +2590,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
25902590
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
25912591
}
25922592

2593+
if (Subtarget.hasAVX512()) {
2594+
for (MVT VT : { MVT::f32, MVT::f64, MVT::v16f32, MVT::v8f64})
2595+
setOperationAction(ISD::FLDEXP, VT, Custom);
2596+
2597+
if (Subtarget.hasVLX()) {
2598+
for (MVT VT : { MVT::v4f32, MVT::v2f64, MVT::v8f32, MVT::v4f64 })
2599+
setOperationAction(ISD::FLDEXP, VT, Custom);
2600+
2601+
if (Subtarget.hasFP16()) {
2602+
for (MVT VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16 })
2603+
setOperationAction(ISD::FLDEXP, VT, Custom);
2604+
}
2605+
}
2606+
2607+
if (Subtarget.hasFP16()) {
2608+
for (MVT VT : { MVT::f16, MVT::v32f16 })
2609+
setOperationAction(ISD::FLDEXP, VT, Custom);
2610+
}
2611+
}
2612+
25932613
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
25942614
// is. We should promote the value to 64-bits to solve this.
25952615
// This is what the CRT headers do - `fmodf` is an inline header
@@ -19142,6 +19162,58 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1914219162
return SDValue();
1914319163
}
1914419164

19165+
static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
19166+
SelectionDAG &DAG) {
19167+
SDLoc DL(Op);
19168+
SDValue X = Op.getOperand(0);
19169+
MVT XTy = X.getSimpleValueType();
19170+
SDValue Exp = Op.getOperand(1);
19171+
MVT XVT, ExpVT;
19172+
19173+
switch (XTy.SimpleTy) {
19174+
default:
19175+
return SDValue();
19176+
case MVT::f16:
19177+
if (Subtarget.hasFP16()) {
19178+
XVT = Subtarget.hasVLX() ? MVT::v8f16 : MVT::v32f16;
19179+
ExpVT = XVT;
19180+
break;
19181+
}
19182+
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
19183+
[[fallthrough]];
19184+
case MVT::f32:
19185+
XVT = MVT::v4f32;
19186+
ExpVT = MVT::v4f32;
19187+
break;
19188+
case MVT::f64:
19189+
XVT = MVT::v2f64;
19190+
ExpVT = MVT::v2f64;
19191+
break;
19192+
case MVT::v4f32:
19193+
case MVT::v2f64:
19194+
case MVT::v8f32:
19195+
case MVT::v4f64:
19196+
case MVT::v16f32:
19197+
case MVT::v8f64:
19198+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19199+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19200+
}
19201+
19202+
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
19203+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
19204+
SDValue VX =
19205+
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
19206+
SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
19207+
DAG.getUNDEF(ExpVT), Exp, Zero);
19208+
SDValue Scalef = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
19209+
SDValue Final =
19210+
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), Scalef, Zero);
19211+
if (X.getValueType() != XTy)
19212+
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
19213+
DAG.getIntPtrConstant(1, SDLoc(Op)));
19214+
return Final;
19215+
}
19216+
1914519217
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
1914619218
SelectionDAG &DAG) {
1914719219
SDLoc dl(Op);
@@ -33672,6 +33744,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3367233744
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
3367333745
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
3367433746
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
33747+
case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
3367533748
// clang-format on
3367633749
}
3367733750
}

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 163 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -79,38 +79,64 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
7979
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
8080
; CHECK-SSE-NEXT: retq
8181
;
82-
; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat:
83-
; CHECK-AVX: # %bb.0:
84-
; CHECK-AVX-NEXT: subq $40, %rsp
85-
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48
86-
; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87-
; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi
88-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89-
; CHECK-AVX-NEXT: callq ldexpf@PLT
90-
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91-
; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92-
; CHECK-AVX-NEXT: vmovd %xmm0, %edi
93-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94-
; CHECK-AVX-NEXT: callq ldexpf@PLT
95-
; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96-
; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97-
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98-
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99-
; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi
100-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101-
; CHECK-AVX-NEXT: callq ldexpf@PLT
102-
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103-
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104-
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105-
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106-
; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi
107-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108-
; CHECK-AVX-NEXT: callq ldexpf@PLT
109-
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110-
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111-
; CHECK-AVX-NEXT: addq $40, %rsp
112-
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8
113-
; CHECK-AVX-NEXT: retq
82+
; CHECK-AVX2-LABEL: fmul_pow2_ldexp_4xfloat:
83+
; CHECK-AVX2: # %bb.0:
84+
; CHECK-AVX2-NEXT: subq $40, %rsp
85+
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
86+
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87+
; CHECK-AVX2-NEXT: vextractps $1, %xmm0, %edi
88+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
90+
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91+
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92+
; CHECK-AVX2-NEXT: vmovd %xmm0, %edi
93+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
95+
; CHECK-AVX2-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96+
; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97+
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98+
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99+
; CHECK-AVX2-NEXT: vextractps $2, %xmm0, %edi
100+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
102+
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103+
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104+
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105+
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106+
; CHECK-AVX2-NEXT: vextractps $3, %xmm0, %edi
107+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
109+
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110+
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111+
; CHECK-AVX2-NEXT: addq $40, %rsp
112+
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
113+
; CHECK-AVX2-NEXT: retq
114+
;
115+
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
116+
; CHECK-ONLY-AVX512F: # %bb.0:
117+
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm1
118+
; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
119+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm1, %xmm2, %xmm1
120+
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[1,1,1,1]
121+
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
122+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
123+
; CHECK-ONLY-AVX512F-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
124+
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[2,3,2,3]
125+
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
126+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
127+
; CHECK-ONLY-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
128+
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
129+
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
130+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm2, %xmm0
131+
; CHECK-ONLY-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
132+
; CHECK-ONLY-AVX512F-NEXT: retq
133+
;
134+
; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
135+
; CHECK-SKX: # %bb.0:
136+
; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
137+
; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
138+
; CHECK-SKX-NEXT: vscalefps %xmm0, %xmm1, %xmm0
139+
; CHECK-SKX-NEXT: retq
114140
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
115141
ret <4 x float> %r
116142
}
@@ -560,82 +586,109 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
560586
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
561587
; CHECK-AVX2-NEXT: retq
562588
;
563-
; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
564-
; CHECK-AVX512F: # %bb.0:
565-
; CHECK-AVX512F-NEXT: subq $72, %rsp
566-
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80
567-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
568-
; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
569-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
570-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
571-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
572-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
573-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
574-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
575-
; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
576-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
577-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
578-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
579-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
580-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
581-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
582-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
583-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
584-
; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
585-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
586-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
587-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
588-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
589-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
590-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
591-
; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
592-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
593-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
594-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
595-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
596-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
597-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
598-
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
599-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
600-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
601-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
602-
; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
603-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
604-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
605-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
606-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
607-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
608-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
609-
; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
610-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
611-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
612-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
613-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
614-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
615-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
616-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
618-
; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
619-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
620-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
621-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
622-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
623-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
624-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
625-
; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax
626-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
627-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
628-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
629-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
630-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
631-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
632-
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
633-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
634-
; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
635-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
636-
; CHECK-AVX512F-NEXT: addq $72, %rsp
637-
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8
638-
; CHECK-AVX512F-NEXT: retq
589+
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
590+
; CHECK-ONLY-AVX512F: # %bb.0:
591+
; CHECK-ONLY-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
592+
; CHECK-ONLY-AVX512F-NEXT: cwtl
593+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm2
594+
; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
595+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm2, %xmm1, %xmm2
596+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2
597+
; CHECK-ONLY-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
598+
; CHECK-ONLY-AVX512F-NEXT: cwtl
599+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
600+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
601+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
602+
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
603+
; CHECK-ONLY-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
604+
; CHECK-ONLY-AVX512F-NEXT: cwtl
605+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
606+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
607+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
608+
; CHECK-ONLY-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
609+
; CHECK-ONLY-AVX512F-NEXT: cwtl
610+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
611+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
612+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
613+
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
614+
; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
615+
; CHECK-ONLY-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
616+
; CHECK-ONLY-AVX512F-NEXT: cwtl
617+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
618+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
619+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
620+
; CHECK-ONLY-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
621+
; CHECK-ONLY-AVX512F-NEXT: cwtl
622+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
623+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
624+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
625+
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
626+
; CHECK-ONLY-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
627+
; CHECK-ONLY-AVX512F-NEXT: cwtl
628+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
629+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
630+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
631+
; CHECK-ONLY-AVX512F-NEXT: vmovd %xmm0, %eax
632+
; CHECK-ONLY-AVX512F-NEXT: cwtl
633+
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
634+
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm1, %xmm0
635+
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
636+
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
637+
; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
638+
; CHECK-ONLY-AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
639+
; CHECK-ONLY-AVX512F-NEXT: retq
640+
;
641+
; CHECK-SKX-LABEL: fmul_pow2_ldexp_8xhalf:
642+
; CHECK-SKX: # %bb.0:
643+
; CHECK-SKX-NEXT: vpextrw $7, %xmm0, %eax
644+
; CHECK-SKX-NEXT: cwtl
645+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1
646+
; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm2 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
647+
; CHECK-SKX-NEXT: vscalefss %xmm1, %xmm2, %xmm1
648+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
649+
; CHECK-SKX-NEXT: vpextrw $6, %xmm0, %eax
650+
; CHECK-SKX-NEXT: cwtl
651+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
652+
; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
653+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
654+
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
655+
; CHECK-SKX-NEXT: vpextrw $5, %xmm0, %eax
656+
; CHECK-SKX-NEXT: cwtl
657+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
658+
; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
659+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
660+
; CHECK-SKX-NEXT: vpextrw $4, %xmm0, %eax
661+
; CHECK-SKX-NEXT: cwtl
662+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
663+
; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
664+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
665+
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
666+
; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
667+
; CHECK-SKX-NEXT: vpextrw $3, %xmm0, %eax
668+
; CHECK-SKX-NEXT: cwtl
669+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
670+
; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
671+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
672+
; CHECK-SKX-NEXT: vpextrw $2, %xmm0, %eax
673+
; CHECK-SKX-NEXT: cwtl
674+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
675+
; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
676+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
677+
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
678+
; CHECK-SKX-NEXT: vpextrw $1, %xmm0, %eax
679+
; CHECK-SKX-NEXT: cwtl
680+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
681+
; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
682+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
683+
; CHECK-SKX-NEXT: vmovd %xmm0, %eax
684+
; CHECK-SKX-NEXT: cwtl
685+
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
686+
; CHECK-SKX-NEXT: vscalefss %xmm0, %xmm2, %xmm0
687+
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
688+
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
689+
; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
690+
; CHECK-SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
691+
; CHECK-SKX-NEXT: retq
639692
%r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16> %i)
640693
ret <8 x half> %r
641694
}
@@ -1769,3 +1822,5 @@ define x86_fp80 @pr128528(i1 %cond) {
17691822
%mul = fmul x86_fp80 %conv, 0xK4007D055555555555800
17701823
ret x86_fp80 %mul
17711824
}
1825+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1826+
; CHECK-AVX512F: {{.*}}

0 commit comments

Comments
 (0)