@@ -312,11 +312,46 @@ define <4 x float> @ulto4f32(<4 x i64> %a) {
312312 ret <4 x float > %b
313313}
314314
315+ define <4 x float > @ulto4f32_nneg (<4 x i64 > %a ) {
316+ ; NODQ-LABEL: ulto4f32_nneg:
317+ ; NODQ: # %bb.0:
318+ ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
319+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
320+ ; NODQ-NEXT: vmovq %xmm0, %rax
321+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
322+ ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
323+ ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
324+ ; NODQ-NEXT: vmovq %xmm0, %rax
325+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
326+ ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
327+ ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
328+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
329+ ; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
330+ ; NODQ-NEXT: vzeroupper
331+ ; NODQ-NEXT: retq
332+ ;
333+ ; VLDQ-LABEL: ulto4f32_nneg:
334+ ; VLDQ: # %bb.0:
335+ ; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
336+ ; VLDQ-NEXT: vzeroupper
337+ ; VLDQ-NEXT: retq
338+ ;
339+ ; DQNOVL-LABEL: ulto4f32_nneg:
340+ ; DQNOVL: # %bb.0:
341+ ; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
342+ ; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0
343+ ; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
344+ ; DQNOVL-NEXT: vzeroupper
345+ ; DQNOVL-NEXT: retq
346+ %b = uitofp nneg <4 x i64 > %a to <4 x float >
347+ ret <4 x float > %b
348+ }
349+
315350define <8 x double > @ulto8f64 (<8 x i64 > %a ) {
316351; NODQ-LABEL: ulto8f64:
317352; NODQ: # %bb.0:
318353; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
319- ; NODQ-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, % zmm1
354+ ; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
320355; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
321356; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
322357; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
@@ -342,14 +377,14 @@ define <16 x double> @ulto16f64(<16 x i64> %a) {
342377; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295]
343378; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
344379; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4
345- ; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, % zmm4
380+ ; NODQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm0 & zmm2)
346381; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
347382; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
348383; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0
349384; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
350385; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0
351386; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0
352- ; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, % zmm3
387+ ; NODQ-NEXT: vpternlogq {{.*#+}} zmm3 = zmm3 | (zmm1 & zmm2)
353388; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1
354389; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1
355390; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1
@@ -1483,7 +1518,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) {
14831518; NODQ: # %bb.0:
14841519; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
14851520; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1486- ; NODQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1521+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
14871522; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
14881523; NODQ-NEXT: retq
14891524;
@@ -1564,7 +1599,7 @@ define <16 x double> @sbto16f64(<16 x double> %a) {
15641599; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
15651600; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
15661601; NODQ-NEXT: kunpckbw %k0, %k1, %k1
1567- ; NODQ-NEXT: vpternlogd $255, % zmm1, %zmm1, %zmm1 {%k1} {z}
1602+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
15681603; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
15691604; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
15701605; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
@@ -1603,7 +1638,7 @@ define <8 x double> @sbto8f64(<8 x double> %a) {
16031638; NOVLDQ: # %bb.0:
16041639; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
16051640; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1606- ; NOVLDQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1641+ ; NOVLDQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
16071642; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
16081643; NOVLDQ-NEXT: retq
16091644;
@@ -1864,7 +1899,7 @@ define <16 x float> @ubto16f32(<16 x i32> %a) {
18641899; NODQ: # %bb.0:
18651900; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
18661901; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1867- ; NODQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1902+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
18681903; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0
18691904; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
18701905; NODQ-NEXT: retq
@@ -1894,7 +1929,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) {
18941929; NODQ: # %bb.0:
18951930; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
18961931; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1897- ; NODQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1932+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
18981933; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1
18991934; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
19001935; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
0 commit comments