@@ -225,9 +225,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
225
225
; X86-AVX512-NEXT: pushl %esi
226
226
; X86-AVX512-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0
227
227
; X86-AVX512-NEXT: vmovd %xmm0, %eax
228
- ; X86-AVX512-NEXT: kmovd %eax, %k0
229
- ; X86-AVX512-NEXT: knotw %k0 , %k1
230
- ; X86-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1 } {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
228
+ ; X86-AVX512-NEXT: kmovd %eax, %k1
229
+ ; X86-AVX512-NEXT: knotw %k1 , %k2
230
+ ; X86-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k2 } {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
231
231
; X86-AVX512-NEXT: vpextrd $1, %xmm0, %eax
232
232
; X86-AVX512-NEXT: vmovd %xmm0, %edx
233
233
; X86-AVX512-NEXT: movl $286331152, %ecx # imm = 0x11111110
@@ -247,9 +247,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
247
247
; X86-AVX512-NEXT: addl %edx, %eax
248
248
; X86-AVX512-NEXT: vmovd %esi, %xmm1
249
249
; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
250
- ; X86-AVX512-NEXT: vmovdqu8 %xmm0 , %xmm1 {%k1}
251
- ; X86-AVX512-NEXT: vpsrlw $4, %xmm1 , %xmm0
252
- ; X86-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0], xmm0[0],xmm1[1 ],xmm0[1],xmm1[2 ],xmm0[2],xmm1[3 ],xmm0[3],xmm1[4 ],xmm0[4],xmm1[5 ],xmm0[5],xmm1[6 ],xmm0[6],xmm1[7 ],xmm0[7]
250
+ ; X86-AVX512-NEXT: vmovdqu8 %xmm1 , %xmm0 {%k1}
251
+ ; X86-AVX512-NEXT: vpsrlw $4, %xmm0 , %xmm1
252
+ ; X86-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0 ],xmm0[1],xmm1[1 ],xmm0[2],xmm1[2 ],xmm0[3],xmm1[3 ],xmm0[4],xmm1[4 ],xmm0[5],xmm1[5 ],xmm0[6],xmm1[6 ],xmm0[7],xmm1 [7]
253
253
; X86-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
254
254
; X86-AVX512-NEXT: popl %esi
255
255
; X86-AVX512-NEXT: popl %edi
@@ -258,9 +258,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
258
258
;
259
259
; X64-AVX512-LABEL: produceShuffleVectorForByte:
260
260
; X64-AVX512: # %bb.0: # %entry
261
- ; X64-AVX512-NEXT: kmovd %edi, %k0
262
- ; X64-AVX512-NEXT: knotw %k0 , %k1
263
- ; X64-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1 } {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
261
+ ; X64-AVX512-NEXT: kmovd %edi, %k1
262
+ ; X64-AVX512-NEXT: knotw %k1 , %k2
263
+ ; X64-AVX512-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k2 } {z} = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
264
264
; X64-AVX512-NEXT: vmovq %xmm0, %rax
265
265
; X64-AVX512-NEXT: movabsq $1229782938247303440, %rcx # imm = 0x1111111111111110
266
266
; X64-AVX512-NEXT: movabsq $76861433640456465, %rdx # imm = 0x111111111111111
@@ -269,9 +269,9 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
269
269
; X64-AVX512-NEXT: vmovq %rax, %xmm0
270
270
; X64-AVX512-NEXT: imulq %rcx, %rdx
271
271
; X64-AVX512-NEXT: vmovq %rdx, %xmm1
272
- ; X64-AVX512-NEXT: vmovdqu8 %xmm0 , %xmm1 {%k1}
273
- ; X64-AVX512-NEXT: vpsrlw $4, %xmm1 , %xmm0
274
- ; X64-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0], xmm0[0],xmm1[1 ],xmm0[1],xmm1[2 ],xmm0[2],xmm1[3 ],xmm0[3],xmm1[4 ],xmm0[4],xmm1[5 ],xmm0[5],xmm1[6 ],xmm0[6],xmm1[7 ],xmm0[7]
272
+ ; X64-AVX512-NEXT: vmovdqu8 %xmm1 , %xmm0 {%k1}
273
+ ; X64-AVX512-NEXT: vpsrlw $4, %xmm0 , %xmm1
274
+ ; X64-AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0 ],xmm0[1],xmm1[1 ],xmm0[2],xmm1[2 ],xmm0[3],xmm1[3 ],xmm0[4],xmm1[4 ],xmm0[5],xmm1[5 ],xmm0[6],xmm1[6 ],xmm0[7],xmm1 [7]
275
275
; X64-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
276
276
; X64-AVX512-NEXT: retq
277
277
entry:
0 commit comments