11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2- ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
3- ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VL
2+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
3+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512FP16
4+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VLF
5+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VLFP16
46
57define half @test_half (half %x , i32 %exp ) nounwind {
6- ; AVX512-LABEL: test_half:
7- ; AVX512: # %bb.0: # %entry
8- ; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
9- ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
10- ; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
11- ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
12- ; AVX512-NEXT: retq
8+ ; AVX512F-LABEL: test_half:
9+ ; AVX512F: # %bb.0: # %entry
10+ ; AVX512F-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
11+ ; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
12+ ; AVX512F-NEXT: vscalefss %xmm1, %xmm0, %xmm0
13+ ; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
14+ ; AVX512F-NEXT: retq
15+ ;
16+ ; AVX512FP16-LABEL: test_half:
17+ ; AVX512FP16: # %bb.0: # %entry
18+ ; AVX512FP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
19+ ; AVX512FP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
20+ ; AVX512FP16-NEXT: retq
1321;
1422; AVX512VL-LABEL: test_half:
1523; AVX512VL: # %bb.0: # %entry
16- ; AVX512VL-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
17- ; AVX512VL-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
24+ ; AVX512VL-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
25+ ; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
26+ ; AVX512VL-NEXT: vscalefss %xmm1, %xmm0, %xmm0
27+ ; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1828; AVX512VL-NEXT: retq
29+ ;
30+ ; AVX512VLFP16-LABEL: test_half:
31+ ; AVX512VLFP16: # %bb.0: # %entry
32+ ; AVX512VLFP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
33+ ; AVX512VLFP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
34+ ; AVX512VLFP16-NEXT: retq
1935entry:
2036 %r = tail call fast half @llvm.ldexp.f16.i32 (half %x , i32 %exp )
2137 ret half %r
@@ -59,30 +75,24 @@ declare fp128 @ldexpl(fp128, i32) memory(none)
5975define <4 x float > @test_ldexp_4xfloat (<4 x float > %x , <4 x i32 > %exp ) nounwind {
6076; AVX512-LABEL: test_ldexp_4xfloat:
6177; AVX512: # %bb.0:
62- ; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm2
63- ; AVX512-NEXT: vscalefss %xmm2, %xmm0, %xmm2
64- ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
65- ; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[1,1,1,1]
66- ; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
67- ; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
68- ; AVX512-NEXT: vunpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
69- ; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
70- ; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3]
71- ; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
72- ; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
73- ; AVX512-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
74- ; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
75- ; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
78+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7679; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
77- ; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
78- ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
80+ ; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
81+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
82+ ; AVX512-NEXT: vzeroupper
7983; AVX512-NEXT: retq
8084;
8185; AVX512VL-LABEL: test_ldexp_4xfloat:
8286; AVX512VL: # %bb.0:
8387; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
8488; AVX512VL-NEXT: vscalefps %xmm1, %xmm0, %xmm0
8589; AVX512VL-NEXT: retq
90+ ;
91+ ; AVX512VLFP16-LABEL: test_ldexp_4xfloat:
92+ ; AVX512VLFP16: # %bb.0:
93+ ; AVX512VLFP16-NEXT: vcvtdq2ps %xmm1, %xmm1
94+ ; AVX512VLFP16-NEXT: vscalefps %xmm1, %xmm0, %xmm0
95+ ; AVX512VLFP16-NEXT: retq
8696 %r = call <4 x float > @llvm.ldexp.v4f32.v4i32 (<4 x float > %x , <4 x i32 > %exp )
8797 ret <4 x float > %r
8898}
@@ -107,50 +117,23 @@ declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>)
107117define <8 x float > @test_ldexp_8xfloat (<8 x float > %x , <8 x i32 > %exp ) nounwind {
108118; AVX512-LABEL: test_ldexp_8xfloat:
109119; AVX512: # %bb.0:
110- ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
111- ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3
112- ; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm4
113- ; AVX512-NEXT: vscalefss %xmm4, %xmm2, %xmm4
114- ; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
115- ; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[1,1,1,1]
116- ; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
117- ; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
118- ; AVX512-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
119- ; AVX512-NEXT: vshufpd {{.*#+}} xmm5 = xmm2[1,0]
120- ; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[2,3,2,3]
121- ; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
122- ; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
123- ; AVX512-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
124- ; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
125- ; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
126- ; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm3
127- ; AVX512-NEXT: vscalefss %xmm3, %xmm2, %xmm2
128- ; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0]
129- ; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm3
130- ; AVX512-NEXT: vscalefss %xmm3, %xmm0, %xmm3
131- ; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
132- ; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[1,1,1,1]
133- ; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
134- ; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
135- ; AVX512-NEXT: vunpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
136- ; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
137- ; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[2,3,2,3]
138- ; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
139- ; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
140- ; AVX512-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
141- ; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
142- ; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
143- ; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
144- ; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
145- ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
146- ; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
120+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
121+ ; AVX512-NEXT: vcvtdq2ps %ymm1, %ymm1
122+ ; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
123+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
147124; AVX512-NEXT: retq
148125;
149126; AVX512VL-LABEL: test_ldexp_8xfloat:
150127; AVX512VL: # %bb.0:
151128; AVX512VL-NEXT: vcvtdq2ps %ymm1, %ymm1
152129; AVX512VL-NEXT: vscalefps %ymm1, %ymm0, %ymm0
153130; AVX512VL-NEXT: retq
131+ ;
132+ ; AVX512VLFP16-LABEL: test_ldexp_8xfloat:
133+ ; AVX512VLFP16: # %bb.0:
134+ ; AVX512VLFP16-NEXT: vcvtdq2ps %ymm1, %ymm1
135+ ; AVX512VLFP16-NEXT: vscalefps %ymm1, %ymm0, %ymm0
136+ ; AVX512VLFP16-NEXT: retq
154137 %r = call <8 x float > @llvm.ldexp.v8f32.v8i32 (<8 x float > %x , <8 x i32 > %exp )
155138 ret <8 x float > %r
156139}
@@ -159,30 +142,23 @@ declare <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float>, <8 x i32>)
159142define <4 x double > @test_ldexp_4xdouble (<4 x double > %x , <4 x i32 > %exp ) nounwind {
160143; AVX512-LABEL: test_ldexp_4xdouble:
161144; AVX512: # %bb.0:
162- ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
163- ; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm1[2,3,2,3]
164- ; AVX512-NEXT: vcvtdq2pd %xmm3, %xmm3
165- ; AVX512-NEXT: vscalefsd %xmm3, %xmm2, %xmm3
166- ; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm4
167- ; AVX512-NEXT: vscalefsd %xmm4, %xmm0, %xmm4
168- ; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
169- ; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[3,3,3,3]
170- ; AVX512-NEXT: vcvtdq2pd %xmm4, %xmm4
171- ; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
172- ; AVX512-NEXT: vscalefsd %xmm4, %xmm2, %xmm2
173- ; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
174- ; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
175- ; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
176- ; AVX512-NEXT: vscalefsd %xmm1, %xmm0, %xmm0
177- ; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
178- ; AVX512-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm3[0],ymm0[0],ymm3[2],ymm0[2]
145+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
146+ ; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1
147+ ; AVX512-NEXT: vscalefpd %zmm1, %zmm0, %zmm0
148+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
179149; AVX512-NEXT: retq
180150;
181151; AVX512VL-LABEL: test_ldexp_4xdouble:
182152; AVX512VL: # %bb.0:
183153; AVX512VL-NEXT: vcvtdq2pd %xmm1, %ymm1
184154; AVX512VL-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
185155; AVX512VL-NEXT: retq
156+ ;
157+ ; AVX512VLFP16-LABEL: test_ldexp_4xdouble:
158+ ; AVX512VLFP16: # %bb.0:
159+ ; AVX512VLFP16-NEXT: vcvtdq2pd %xmm1, %ymm1
160+ ; AVX512VLFP16-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
161+ ; AVX512VLFP16-NEXT: retq
186162 %r = call <4 x double > @llvm.ldexp.v4f64.v4i32 (<4 x double > %x , <4 x i32 > %exp )
187163 ret <4 x double > %r
188164}
@@ -210,3 +186,5 @@ define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwi
210186}
211187declare <8 x double > @llvm.ldexp.v8f64.v8i32 (<8 x double >, <8 x i32 >)
212188
189+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
190+ ; AVX512VLF: {{.*}}
0 commit comments