@@ -79,38 +79,64 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
7979; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
8080; CHECK-SSE-NEXT: retq
8181;
82- ; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat:
83- ; CHECK-AVX: # %bb.0:
84- ; CHECK-AVX-NEXT: subq $40, %rsp
85- ; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48
86- ; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87- ; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi
88- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89- ; CHECK-AVX-NEXT: callq ldexpf@PLT
90- ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91- ; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92- ; CHECK-AVX-NEXT: vmovd %xmm0, %edi
93- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94- ; CHECK-AVX-NEXT: callq ldexpf@PLT
95- ; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96- ; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97- ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98- ; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99- ; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi
100- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101- ; CHECK-AVX-NEXT: callq ldexpf@PLT
102- ; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103- ; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104- ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105- ; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106- ; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi
107- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108- ; CHECK-AVX-NEXT: callq ldexpf@PLT
109- ; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110- ; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111- ; CHECK-AVX-NEXT: addq $40, %rsp
112- ; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8
113- ; CHECK-AVX-NEXT: retq
82+ ; CHECK-AVX2-LABEL: fmul_pow2_ldexp_4xfloat:
83+ ; CHECK-AVX2: # %bb.0:
84+ ; CHECK-AVX2-NEXT: subq $40, %rsp
85+ ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
86+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87+ ; CHECK-AVX2-NEXT: vextractps $1, %xmm0, %edi
88+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
90+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91+ ; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92+ ; CHECK-AVX2-NEXT: vmovd %xmm0, %edi
93+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
95+ ; CHECK-AVX2-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96+ ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98+ ; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99+ ; CHECK-AVX2-NEXT: vextractps $2, %xmm0, %edi
100+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
102+ ; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103+ ; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105+ ; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106+ ; CHECK-AVX2-NEXT: vextractps $3, %xmm0, %edi
107+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
109+ ; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110+ ; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111+ ; CHECK-AVX2-NEXT: addq $40, %rsp
112+ ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
113+ ; CHECK-AVX2-NEXT: retq
114+ ;
115+ ; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
116+ ; CHECK-ONLY-AVX512F: # %bb.0:
117+ ; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm1
118+ ; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
119+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm1, %xmm2, %xmm1
120+ ; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[1,1,1,1]
121+ ; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
122+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
123+ ; CHECK-ONLY-AVX512F-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
124+ ; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[2,3,2,3]
125+ ; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
126+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
127+ ; CHECK-ONLY-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
128+ ; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
129+ ; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
130+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm2, %xmm0
131+ ; CHECK-ONLY-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
132+ ; CHECK-ONLY-AVX512F-NEXT: retq
133+ ;
134+ ; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
135+ ; CHECK-SKX: # %bb.0:
136+ ; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
137+ ; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
138+ ; CHECK-SKX-NEXT: vscalefps %xmm0, %xmm1, %xmm0
139+ ; CHECK-SKX-NEXT: retq
114140 %r = call <4 x float > @llvm.ldexp.v4f32.v4i32 (<4 x float > <float 9 .000000e+00 , float 9 .000000e+00 , float 9 .000000e+00 , float 9 .000000e+00 >, <4 x i32 > %i )
115141 ret <4 x float > %r
116142}
@@ -560,82 +586,109 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
560586; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
561587; CHECK-AVX2-NEXT: retq
562588;
563- ; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
564- ; CHECK-AVX512F: # %bb.0:
565- ; CHECK-AVX512F-NEXT: subq $72, %rsp
566- ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80
567- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
568- ; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
569- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
570- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
571- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
572- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
573- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
574- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
575- ; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
576- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
577- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
578- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
579- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
580- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
581- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
582- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
583- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
584- ; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
585- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
586- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
587- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
588- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
589- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
590- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
591- ; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
592- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
593- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
594- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
595- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
596- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
597- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
598- ; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
599- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
600- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
601- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
602- ; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
603- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
604- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
605- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
606- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
607- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
608- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
609- ; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
610- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
611- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
612- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
613- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
614- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
615- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
616- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
618- ; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
619- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
620- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
621- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
622- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
623- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
624- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
625- ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax
626- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
627- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
628- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
629- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
630- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
631- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
632- ; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
633- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
634- ; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
635- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
636- ; CHECK-AVX512F-NEXT: addq $72, %rsp
637- ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8
638- ; CHECK-AVX512F-NEXT: retq
589+ ; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
590+ ; CHECK-ONLY-AVX512F: # %bb.0:
591+ ; CHECK-ONLY-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
592+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
593+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm2
594+ ; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
595+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm2, %xmm1, %xmm2
596+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2
597+ ; CHECK-ONLY-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
598+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
599+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
600+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
601+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
602+ ; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
603+ ; CHECK-ONLY-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
604+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
605+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
606+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
607+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
608+ ; CHECK-ONLY-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
609+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
610+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
611+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
612+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
613+ ; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
614+ ; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
615+ ; CHECK-ONLY-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
616+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
617+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
618+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
619+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
620+ ; CHECK-ONLY-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
621+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
622+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
623+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
624+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
625+ ; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
626+ ; CHECK-ONLY-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
627+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
628+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
629+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
630+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
631+ ; CHECK-ONLY-AVX512F-NEXT: vmovd %xmm0, %eax
632+ ; CHECK-ONLY-AVX512F-NEXT: cwtl
633+ ; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
634+ ; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm1, %xmm0
635+ ; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
636+ ; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
637+ ; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
638+ ; CHECK-ONLY-AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
639+ ; CHECK-ONLY-AVX512F-NEXT: retq
640+ ;
641+ ; CHECK-SKX-LABEL: fmul_pow2_ldexp_8xhalf:
642+ ; CHECK-SKX: # %bb.0:
643+ ; CHECK-SKX-NEXT: vpextrw $7, %xmm0, %eax
644+ ; CHECK-SKX-NEXT: cwtl
645+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1
646+ ; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm2 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
647+ ; CHECK-SKX-NEXT: vscalefss %xmm1, %xmm2, %xmm1
648+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
649+ ; CHECK-SKX-NEXT: vpextrw $6, %xmm0, %eax
650+ ; CHECK-SKX-NEXT: cwtl
651+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
652+ ; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
653+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
654+ ; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
655+ ; CHECK-SKX-NEXT: vpextrw $5, %xmm0, %eax
656+ ; CHECK-SKX-NEXT: cwtl
657+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
658+ ; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
659+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
660+ ; CHECK-SKX-NEXT: vpextrw $4, %xmm0, %eax
661+ ; CHECK-SKX-NEXT: cwtl
662+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
663+ ; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
664+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
665+ ; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
666+ ; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
667+ ; CHECK-SKX-NEXT: vpextrw $3, %xmm0, %eax
668+ ; CHECK-SKX-NEXT: cwtl
669+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
670+ ; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
671+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
672+ ; CHECK-SKX-NEXT: vpextrw $2, %xmm0, %eax
673+ ; CHECK-SKX-NEXT: cwtl
674+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
675+ ; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
676+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
677+ ; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
678+ ; CHECK-SKX-NEXT: vpextrw $1, %xmm0, %eax
679+ ; CHECK-SKX-NEXT: cwtl
680+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
681+ ; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
682+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
683+ ; CHECK-SKX-NEXT: vmovd %xmm0, %eax
684+ ; CHECK-SKX-NEXT: cwtl
685+ ; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
686+ ; CHECK-SKX-NEXT: vscalefss %xmm0, %xmm2, %xmm0
687+ ; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
688+ ; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
689+ ; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
690+ ; CHECK-SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
691+ ; CHECK-SKX-NEXT: retq
639692 %r = call <8 x half > @llvm.ldexp.v8f16.v8i16 (<8 x half > <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16 > %i )
640693 ret <8 x half > %r
641694}
@@ -1769,3 +1822,5 @@ define x86_fp80 @pr128528(i1 %cond) {
17691822 %mul = fmul x86_fp80 %conv , 0xK4007D055555555555800
17701823 ret x86_fp80 %mul
17711824}
1825+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1826+ ; CHECK-AVX512F: {{.*}}
0 commit comments