Skip to content

Commit fc9e874

Browse files
committed
Widen 128/256 bit vector types when AVX512VL is not available.
1 parent e777872 commit fc9e874

File tree

3 files changed

+129
-152
lines changed

3 files changed

+129
-152
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 66 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1828,6 +1828,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
18281828
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
18291829
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
18301830
}
1831+
1832+
for (MVT VT : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f32,
1833+
MVT::v4f64, MVT::v16f32, MVT::v8f64})
1834+
setOperationAction(ISD::FLDEXP, VT, Custom);
1835+
1836+
if (Subtarget.hasFP16()) {
1837+
for (MVT VT : {MVT::f16, MVT::v8f16, MVT::v16f16, MVT::v32f16})
1838+
setOperationAction(ISD::FLDEXP, VT, Custom);
1839+
}
1840+
18311841
setOperationAction(ISD::LRINT, MVT::v16f32,
18321842
Subtarget.hasDQI() ? Legal : Custom);
18331843
setOperationAction(ISD::LRINT, MVT::v8f64,
@@ -2590,26 +2600,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
25902600
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
25912601
}
25922602

2593-
if (Subtarget.hasAVX512()) {
2594-
for (MVT VT : { MVT::f32, MVT::f64, MVT::v16f32, MVT::v8f64})
2595-
setOperationAction(ISD::FLDEXP, VT, Custom);
2596-
2597-
if (Subtarget.hasVLX()) {
2598-
for (MVT VT : { MVT::v4f32, MVT::v2f64, MVT::v8f32, MVT::v4f64 })
2599-
setOperationAction(ISD::FLDEXP, VT, Custom);
2600-
2601-
if (Subtarget.hasFP16()) {
2602-
for (MVT VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16 })
2603-
setOperationAction(ISD::FLDEXP, VT, Custom);
2604-
}
2605-
}
2606-
2607-
if (Subtarget.hasFP16()) {
2608-
for (MVT VT : { MVT::f16, MVT::v32f16 })
2609-
setOperationAction(ISD::FLDEXP, VT, Custom);
2610-
}
2611-
}
2612-
26132603
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
26142604
// is. We should promote the value to 64-bits to solve this.
26152605
// This is what the CRT headers do - `fmodf` is an inline header
@@ -19170,48 +19160,67 @@ static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
1917019160
SDValue Exp = Op.getOperand(1);
1917119161
MVT XVT, ExpVT;
1917219162

19173-
switch (XTy.SimpleTy) {
19174-
default:
19175-
return SDValue();
19176-
case MVT::f16:
19177-
if (Subtarget.hasFP16()) {
19178-
XVT = Subtarget.hasVLX() ? MVT::v8f16 : MVT::v32f16;
19179-
ExpVT = XVT;
19180-
break;
19181-
}
19182-
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
19183-
[[fallthrough]];
19184-
case MVT::f32:
19185-
XVT = MVT::v4f32;
19186-
ExpVT = MVT::v4f32;
19163+
switch (XTy.SimpleTy) {
19164+
default:
19165+
return SDValue();
19166+
case MVT::f16:
19167+
if (Subtarget.hasFP16()) {
19168+
XVT = MVT::v8f16;
19169+
ExpVT = XVT;
19170+
break;
19171+
}
19172+
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
19173+
[[fallthrough]];
19174+
case MVT::f32:
19175+
XVT = MVT::v4f32;
19176+
ExpVT = MVT::v4f32;
19177+
break;
19178+
case MVT::f64:
19179+
XVT = MVT::v2f64;
19180+
ExpVT = MVT::v2f64;
19181+
break;
19182+
case MVT::v4f32:
19183+
case MVT::v2f64:
19184+
if (!Subtarget.hasVLX()) {
19185+
XVT = XTy == MVT::v4f32 ? MVT::v16f32 : MVT::v8f64;
19186+
ExpVT = XVT;
1918719187
break;
19188-
case MVT::f64:
19189-
XVT = MVT::v2f64;
19190-
ExpVT = MVT::v2f64;
19188+
}
19189+
[[fallthrough]];
19190+
case MVT::v8f32:
19191+
case MVT::v4f64:
19192+
if (!Subtarget.hasVLX()) {
19193+
XVT = XTy == MVT::v8f32 ? MVT::v16f32 : MVT::v8f64;
19194+
ExpVT = XVT;
1919119195
break;
19192-
case MVT::v4f32:
19193-
case MVT::v2f64:
19194-
case MVT::v8f32:
19195-
case MVT::v4f64:
19196-
case MVT::v16f32:
19197-
case MVT::v8f64:
19198-
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19199-
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19196+
}
19197+
[[fallthrough]];
19198+
case MVT::v16f32:
19199+
case MVT::v8f64:
19200+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19201+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
1920019202
}
1920119203

19202-
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1920319204
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
19204-
SDValue VX =
19205-
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
19206-
SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
19207-
DAG.getUNDEF(ExpVT), Exp, Zero);
19208-
SDValue Scalef = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
19209-
SDValue Final =
19210-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), Scalef, Zero);
19211-
if (X.getValueType() != XTy)
19212-
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
19213-
DAG.getIntPtrConstant(1, SDLoc(Op)));
19214-
return Final;
19205+
if (XTy.isVector()) {
19206+
SDValue WideX =
19207+
DAG.getInsertSubvector(DL, DAG.getUNDEF(XVT), X, 0);
19208+
SDValue WideExp =
19209+
DAG.getInsertSubvector(DL, DAG.getUNDEF(ExpVT), Exp, 0);
19210+
SDValue Scalef =
19211+
DAG.getNode(X86ISD::SCALEF, DL, XVT, WideX, WideExp, WideX);
19212+
SDValue Final = DAG.getExtractSubvector(DL, XTy, Scalef, 0);
19213+
return Final;
19214+
} else {
19215+
SDValue VX = DAG.getInsertVectorElt(DL, DAG.getUNDEF(XVT), X, 0);
19216+
SDValue VExp = DAG.getInsertVectorElt(DL, DAG.getUNDEF(ExpVT), Exp, 0);
19217+
SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
19218+
SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0);
19219+
if (X.getValueType() != XTy)
19220+
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
19221+
DAG.getIntPtrConstant(1, SDLoc(Op)));
19222+
return Final;
19223+
}
1921519224
}
1921619225

1921719226
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,21 +114,11 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
114114
;
115115
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
116116
; CHECK-ONLY-AVX512F: # %bb.0:
117-
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm1
118-
; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
119-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm1, %xmm2, %xmm1
120-
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[1,1,1,1]
121-
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
122-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
123-
; CHECK-ONLY-AVX512F-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
124-
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[2,3,2,3]
125-
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
126-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
127-
; CHECK-ONLY-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
128-
; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
117+
; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
129118
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
130-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm2, %xmm0
131-
; CHECK-ONLY-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
119+
; CHECK-ONLY-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
120+
; CHECK-ONLY-AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
121+
; CHECK-ONLY-AVX512F-NEXT: vzeroupper
132122
; CHECK-ONLY-AVX512F-NEXT: retq
133123
;
134124
; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
Lines changed: 59 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,37 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
3-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VL
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512FP16
4+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VLF
5+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VLFP16
46

57
define half @test_half(half %x, i32 %exp) nounwind {
6-
; AVX512-LABEL: test_half:
7-
; AVX512: # %bb.0: # %entry
8-
; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
9-
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
10-
; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
11-
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
12-
; AVX512-NEXT: retq
8+
; AVX512F-LABEL: test_half:
9+
; AVX512F: # %bb.0: # %entry
10+
; AVX512F-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
11+
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
12+
; AVX512F-NEXT: vscalefss %xmm1, %xmm0, %xmm0
13+
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
14+
; AVX512F-NEXT: retq
15+
;
16+
; AVX512FP16-LABEL: test_half:
17+
; AVX512FP16: # %bb.0: # %entry
18+
; AVX512FP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
19+
; AVX512FP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
20+
; AVX512FP16-NEXT: retq
1321
;
1422
; AVX512VL-LABEL: test_half:
1523
; AVX512VL: # %bb.0: # %entry
16-
; AVX512VL-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
17-
; AVX512VL-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
24+
; AVX512VL-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
25+
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
26+
; AVX512VL-NEXT: vscalefss %xmm1, %xmm0, %xmm0
27+
; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1828
; AVX512VL-NEXT: retq
29+
;
30+
; AVX512VLFP16-LABEL: test_half:
31+
; AVX512VLFP16: # %bb.0: # %entry
32+
; AVX512VLFP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
33+
; AVX512VLFP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
34+
; AVX512VLFP16-NEXT: retq
1935
entry:
2036
%r = tail call fast half @llvm.ldexp.f16.i32(half %x, i32 %exp)
2137
ret half %r
@@ -59,30 +75,24 @@ declare fp128 @ldexpl(fp128, i32) memory(none)
5975
define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind {
6076
; AVX512-LABEL: test_ldexp_4xfloat:
6177
; AVX512: # %bb.0:
62-
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm2
63-
; AVX512-NEXT: vscalefss %xmm2, %xmm0, %xmm2
64-
; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
65-
; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[1,1,1,1]
66-
; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
67-
; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
68-
; AVX512-NEXT: vunpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
69-
; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
70-
; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3]
71-
; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
72-
; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
73-
; AVX512-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
74-
; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
75-
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
78+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7679
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
77-
; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
78-
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
80+
; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
81+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
82+
; AVX512-NEXT: vzeroupper
7983
; AVX512-NEXT: retq
8084
;
8185
; AVX512VL-LABEL: test_ldexp_4xfloat:
8286
; AVX512VL: # %bb.0:
8387
; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
8488
; AVX512VL-NEXT: vscalefps %xmm1, %xmm0, %xmm0
8589
; AVX512VL-NEXT: retq
90+
;
91+
; AVX512VLFP16-LABEL: test_ldexp_4xfloat:
92+
; AVX512VLFP16: # %bb.0:
93+
; AVX512VLFP16-NEXT: vcvtdq2ps %xmm1, %xmm1
94+
; AVX512VLFP16-NEXT: vscalefps %xmm1, %xmm0, %xmm0
95+
; AVX512VLFP16-NEXT: retq
8696
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %x, <4 x i32> %exp)
8797
ret <4 x float> %r
8898
}
@@ -107,50 +117,23 @@ declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>)
107117
define <8 x float> @test_ldexp_8xfloat(<8 x float> %x, <8 x i32> %exp) nounwind {
108118
; AVX512-LABEL: test_ldexp_8xfloat:
109119
; AVX512: # %bb.0:
110-
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
111-
; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3
112-
; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm4
113-
; AVX512-NEXT: vscalefss %xmm4, %xmm2, %xmm4
114-
; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
115-
; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[1,1,1,1]
116-
; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
117-
; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
118-
; AVX512-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
119-
; AVX512-NEXT: vshufpd {{.*#+}} xmm5 = xmm2[1,0]
120-
; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[2,3,2,3]
121-
; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
122-
; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
123-
; AVX512-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
124-
; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
125-
; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
126-
; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm3
127-
; AVX512-NEXT: vscalefss %xmm3, %xmm2, %xmm2
128-
; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0]
129-
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm3
130-
; AVX512-NEXT: vscalefss %xmm3, %xmm0, %xmm3
131-
; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
132-
; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[1,1,1,1]
133-
; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
134-
; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
135-
; AVX512-NEXT: vunpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
136-
; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
137-
; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[2,3,2,3]
138-
; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
139-
; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
140-
; AVX512-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
141-
; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
142-
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
143-
; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
144-
; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
145-
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
146-
; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
120+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
121+
; AVX512-NEXT: vcvtdq2ps %ymm1, %ymm1
122+
; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
123+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
147124
; AVX512-NEXT: retq
148125
;
149126
; AVX512VL-LABEL: test_ldexp_8xfloat:
150127
; AVX512VL: # %bb.0:
151128
; AVX512VL-NEXT: vcvtdq2ps %ymm1, %ymm1
152129
; AVX512VL-NEXT: vscalefps %ymm1, %ymm0, %ymm0
153130
; AVX512VL-NEXT: retq
131+
;
132+
; AVX512VLFP16-LABEL: test_ldexp_8xfloat:
133+
; AVX512VLFP16: # %bb.0:
134+
; AVX512VLFP16-NEXT: vcvtdq2ps %ymm1, %ymm1
135+
; AVX512VLFP16-NEXT: vscalefps %ymm1, %ymm0, %ymm0
136+
; AVX512VLFP16-NEXT: retq
154137
%r = call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> %x, <8 x i32> %exp)
155138
ret <8 x float> %r
156139
}
@@ -159,30 +142,23 @@ declare <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float>, <8 x i32>)
159142
define <4 x double> @test_ldexp_4xdouble(<4 x double> %x, <4 x i32> %exp) nounwind {
160143
; AVX512-LABEL: test_ldexp_4xdouble:
161144
; AVX512: # %bb.0:
162-
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
163-
; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm1[2,3,2,3]
164-
; AVX512-NEXT: vcvtdq2pd %xmm3, %xmm3
165-
; AVX512-NEXT: vscalefsd %xmm3, %xmm2, %xmm3
166-
; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm4
167-
; AVX512-NEXT: vscalefsd %xmm4, %xmm0, %xmm4
168-
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
169-
; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[3,3,3,3]
170-
; AVX512-NEXT: vcvtdq2pd %xmm4, %xmm4
171-
; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
172-
; AVX512-NEXT: vscalefsd %xmm4, %xmm2, %xmm2
173-
; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
174-
; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
175-
; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
176-
; AVX512-NEXT: vscalefsd %xmm1, %xmm0, %xmm0
177-
; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
178-
; AVX512-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm3[0],ymm0[0],ymm3[2],ymm0[2]
145+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
146+
; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1
147+
; AVX512-NEXT: vscalefpd %zmm1, %zmm0, %zmm0
148+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
179149
; AVX512-NEXT: retq
180150
;
181151
; AVX512VL-LABEL: test_ldexp_4xdouble:
182152
; AVX512VL: # %bb.0:
183153
; AVX512VL-NEXT: vcvtdq2pd %xmm1, %ymm1
184154
; AVX512VL-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
185155
; AVX512VL-NEXT: retq
156+
;
157+
; AVX512VLFP16-LABEL: test_ldexp_4xdouble:
158+
; AVX512VLFP16: # %bb.0:
159+
; AVX512VLFP16-NEXT: vcvtdq2pd %xmm1, %ymm1
160+
; AVX512VLFP16-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
161+
; AVX512VLFP16-NEXT: retq
186162
%r = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> %x, <4 x i32> %exp)
187163
ret <4 x double> %r
188164
}
@@ -210,3 +186,5 @@ define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwi
210186
}
211187
declare <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double>, <8 x i32>)
212188

189+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
190+
; AVX512VLF: {{.*}}

0 commit comments

Comments
 (0)