@@ -892,10 +892,7 @@ define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
892892; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
893893; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0
894894; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1
895- ; AVX512F-NEXT: kshiftlw $8, %k0, %k0
896- ; AVX512F-NEXT: kshiftlw $8, %k1, %k1
897- ; AVX512F-NEXT: kshiftrw $8, %k1, %k1
898- ; AVX512F-NEXT: korw %k1, %k0, %k1
895+ ; AVX512F-NEXT: kunpckbw %k1, %k0, %k1
899896; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
900897; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1}
901898; AVX512F-NEXT: vzeroupper
@@ -905,8 +902,7 @@ define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
905902; AVX512VL: # %bb.0: # %entry
906903; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0
907904; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1
908- ; AVX512VL-NEXT: kshiftlw $8, %k0, %k0
909- ; AVX512VL-NEXT: korw %k1, %k0, %k1
905+ ; AVX512VL-NEXT: kunpckbw %k1, %k0, %k1
910906; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
911907; AVX512VL-NEXT: vmovaps %zmm0, (%rdi) {%k1}
912908; AVX512VL-NEXT: vzeroupper
@@ -916,8 +912,7 @@ define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
916912; VL_BW_DQ: # %bb.0: # %entry
917913; VL_BW_DQ-NEXT: vcmpltps %ymm1, %ymm0, %k0
918914; VL_BW_DQ-NEXT: vcmpltps %ymm3, %ymm2, %k1
919- ; VL_BW_DQ-NEXT: kshiftlw $8, %k0, %k0
920- ; VL_BW_DQ-NEXT: korw %k1, %k0, %k1
915+ ; VL_BW_DQ-NEXT: kunpckbw %k1, %k0, %k1
921916; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0
922917; VL_BW_DQ-NEXT: vmovaps %zmm0, (%rdi) {%k1}
923918; VL_BW_DQ-NEXT: vzeroupper
@@ -945,10 +940,7 @@ define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x f
945940; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
946941; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0
947942; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1
948- ; AVX512F-NEXT: kshiftlw $8, %k0, %k0
949- ; AVX512F-NEXT: kshiftlw $8, %k1, %k1
950- ; AVX512F-NEXT: kshiftrw $8, %k1, %k1
951- ; AVX512F-NEXT: korw %k0, %k1, %k1
943+ ; AVX512F-NEXT: kunpckbw %k1, %k0, %k1
952944; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
953945; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1}
954946; AVX512F-NEXT: vzeroupper
@@ -958,8 +950,7 @@ define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x f
958950; AVX512VL: # %bb.0: # %entry
959951; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0
960952; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1
961- ; AVX512VL-NEXT: kshiftlw $8, %k0, %k0
962- ; AVX512VL-NEXT: korw %k0, %k1, %k1
953+ ; AVX512VL-NEXT: kunpckbw %k1, %k0, %k1
963954; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
964955; AVX512VL-NEXT: vmovaps %zmm0, (%rdi) {%k1}
965956; AVX512VL-NEXT: vzeroupper
@@ -969,8 +960,7 @@ define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x f
969960; VL_BW_DQ: # %bb.0: # %entry
970961; VL_BW_DQ-NEXT: vcmpltps %ymm1, %ymm0, %k0
971962; VL_BW_DQ-NEXT: vcmpltps %ymm3, %ymm2, %k1
972- ; VL_BW_DQ-NEXT: kshiftlw $8, %k0, %k0
973- ; VL_BW_DQ-NEXT: korw %k0, %k1, %k1
963+ ; VL_BW_DQ-NEXT: kunpckbw %k1, %k0, %k1
974964; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0
975965; VL_BW_DQ-NEXT: vmovaps %zmm0, (%rdi) {%k1}
976966; VL_BW_DQ-NEXT: vzeroupper
0 commit comments