Skip to content

Commit d7de7ac

Browse files
committed
[X86] Raise the latency for VectorImul from 4 to 5 in Skylake scheduler models
Based on uops.info these should have 5 cycle latency as they did on Haswell/Broadwell. I have no additional internal information from Intel. This was also shown as a discrepancy in the spreadsheet that was sent with an early llvm-dev post about llvm-exegesis. It also matches Agner Fog. Differential Revision: https://reviews.llvm.org/D74357
1 parent 9220bbc commit d7de7ac

File tree

16 files changed

+131
-131
lines changed

16 files changed

+131
-131
lines changed

llvm/lib/Target/X86/X86SchedSkylakeClient.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,9 +362,9 @@ defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
362362
defm : SKLWriteResPair<WriteVecTest, [SKLPort0,SKLPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
363363
defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>;
364364
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
365-
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0] , 4, [1], 1, 5>; // Vector integer multiply.
366-
defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>;
367-
defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>;
365+
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0] , 5, [1], 1, 5>; // Vector integer multiply.
366+
defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 5, [1], 1, 6>;
367+
defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 5, [1], 1, 7>;
368368
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
369369
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD.
370370
defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>;

llvm/lib/Target/X86/X86SchedSkylakeServer.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -362,10 +362,10 @@ defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>;
362362
defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
363363
defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
364364
defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>;
365-
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 4, [1], 1, 5>; // Vector integer multiply.
366-
defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 4, [1], 1, 6>;
367-
defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 4, [1], 1, 7>;
368-
defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 4, [1], 1, 7>;
365+
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5, [1], 1, 5>; // Vector integer multiply.
366+
defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 5, [1], 1, 6>;
367+
defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 5, [1], 1, 7>;
368+
defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 5, [1], 1, 7>;
369369
defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD.
370370
defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>;
371371
defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>;

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,10 +1477,10 @@ vzeroupper
14771477
# CHECK-NEXT: 2 6 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2
14781478
# CHECK-NEXT: 2 2 2.00 vpinsrw $1, %eax, %xmm1, %xmm2
14791479
# CHECK-NEXT: 2 6 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2
1480-
# CHECK-NEXT: 1 4 0.50 vpmaddubsw %xmm0, %xmm1, %xmm2
1481-
# CHECK-NEXT: 2 10 0.50 * vpmaddubsw (%rax), %xmm1, %xmm2
1482-
# CHECK-NEXT: 1 4 0.50 vpmaddwd %xmm0, %xmm1, %xmm2
1483-
# CHECK-NEXT: 2 10 0.50 * vpmaddwd (%rax), %xmm1, %xmm2
1480+
# CHECK-NEXT: 1 5 0.50 vpmaddubsw %xmm0, %xmm1, %xmm2
1481+
# CHECK-NEXT: 2 11 0.50 * vpmaddubsw (%rax), %xmm1, %xmm2
1482+
# CHECK-NEXT: 1 5 0.50 vpmaddwd %xmm0, %xmm1, %xmm2
1483+
# CHECK-NEXT: 2 11 0.50 * vpmaddwd (%rax), %xmm1, %xmm2
14841484
# CHECK-NEXT: 1 1 0.50 vpmaxsb %xmm0, %xmm1, %xmm2
14851485
# CHECK-NEXT: 2 7 0.50 * vpmaxsb (%rax), %xmm1, %xmm2
14861486
# CHECK-NEXT: 1 1 0.50 vpmaxsd %xmm0, %xmm1, %xmm2
@@ -1530,20 +1530,20 @@ vzeroupper
15301530
# CHECK-NEXT: 2 6 1.00 * vpmovzxwd (%rax), %xmm2
15311531
# CHECK-NEXT: 1 1 1.00 vpmovzxwq %xmm0, %xmm2
15321532
# CHECK-NEXT: 2 6 1.00 * vpmovzxwq (%rax), %xmm2
1533-
# CHECK-NEXT: 1 4 0.50 vpmuldq %xmm0, %xmm1, %xmm2
1534-
# CHECK-NEXT: 2 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2
1535-
# CHECK-NEXT: 1 4 0.50 vpmulhrsw %xmm0, %xmm1, %xmm2
1536-
# CHECK-NEXT: 2 10 0.50 * vpmulhrsw (%rax), %xmm1, %xmm2
1537-
# CHECK-NEXT: 1 4 0.50 vpmulhuw %xmm0, %xmm1, %xmm2
1538-
# CHECK-NEXT: 2 10 0.50 * vpmulhuw (%rax), %xmm1, %xmm2
1539-
# CHECK-NEXT: 1 4 0.50 vpmulhw %xmm0, %xmm1, %xmm2
1540-
# CHECK-NEXT: 2 10 0.50 * vpmulhw (%rax), %xmm1, %xmm2
1533+
# CHECK-NEXT: 1 5 0.50 vpmuldq %xmm0, %xmm1, %xmm2
1534+
# CHECK-NEXT: 2 11 0.50 * vpmuldq (%rax), %xmm1, %xmm2
1535+
# CHECK-NEXT: 1 5 0.50 vpmulhrsw %xmm0, %xmm1, %xmm2
1536+
# CHECK-NEXT: 2 11 0.50 * vpmulhrsw (%rax), %xmm1, %xmm2
1537+
# CHECK-NEXT: 1 5 0.50 vpmulhuw %xmm0, %xmm1, %xmm2
1538+
# CHECK-NEXT: 2 11 0.50 * vpmulhuw (%rax), %xmm1, %xmm2
1539+
# CHECK-NEXT: 1 5 0.50 vpmulhw %xmm0, %xmm1, %xmm2
1540+
# CHECK-NEXT: 2 11 0.50 * vpmulhw (%rax), %xmm1, %xmm2
15411541
# CHECK-NEXT: 2 10 1.00 vpmulld %xmm0, %xmm1, %xmm2
15421542
# CHECK-NEXT: 3 16 1.00 * vpmulld (%rax), %xmm1, %xmm2
1543-
# CHECK-NEXT: 1 4 0.50 vpmullw %xmm0, %xmm1, %xmm2
1544-
# CHECK-NEXT: 2 10 0.50 * vpmullw (%rax), %xmm1, %xmm2
1545-
# CHECK-NEXT: 1 4 0.50 vpmuludq %xmm0, %xmm1, %xmm2
1546-
# CHECK-NEXT: 2 10 0.50 * vpmuludq (%rax), %xmm1, %xmm2
1543+
# CHECK-NEXT: 1 5 0.50 vpmullw %xmm0, %xmm1, %xmm2
1544+
# CHECK-NEXT: 2 11 0.50 * vpmullw (%rax), %xmm1, %xmm2
1545+
# CHECK-NEXT: 1 5 0.50 vpmuludq %xmm0, %xmm1, %xmm2
1546+
# CHECK-NEXT: 2 11 0.50 * vpmuludq (%rax), %xmm1, %xmm2
15471547
# CHECK-NEXT: 1 1 0.33 vpor %xmm0, %xmm1, %xmm2
15481548
# CHECK-NEXT: 2 7 0.50 * vpor (%rax), %xmm1, %xmm2
15491549
# CHECK-NEXT: 1 3 1.00 vpsadbw %xmm0, %xmm1, %xmm2

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -588,10 +588,10 @@ vpxor (%rax), %ymm1, %ymm2
588588
# CHECK-NEXT: 4 10 2.00 * vphsubsw (%rax), %ymm1, %ymm2
589589
# CHECK-NEXT: 3 3 2.00 vphsubw %ymm0, %ymm1, %ymm2
590590
# CHECK-NEXT: 4 10 2.00 * vphsubw (%rax), %ymm1, %ymm2
591-
# CHECK-NEXT: 1 4 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2
592-
# CHECK-NEXT: 2 11 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2
593-
# CHECK-NEXT: 1 4 0.50 vpmaddwd %ymm0, %ymm1, %ymm2
594-
# CHECK-NEXT: 2 11 0.50 * vpmaddwd (%rax), %ymm1, %ymm2
591+
# CHECK-NEXT: 1 5 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2
592+
# CHECK-NEXT: 2 12 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2
593+
# CHECK-NEXT: 1 5 0.50 vpmaddwd %ymm0, %ymm1, %ymm2
594+
# CHECK-NEXT: 2 12 0.50 * vpmaddwd (%rax), %ymm1, %ymm2
595595
# CHECK-NEXT: 2 7 0.50 * vpmaskmovd (%rax), %xmm0, %xmm2
596596
# CHECK-NEXT: 2 8 0.50 * vpmaskmovd (%rax), %ymm0, %ymm2
597597
# CHECK-NEXT: 2 2 1.00 * * vpmaskmovd %xmm0, %xmm1, (%rax)
@@ -649,20 +649,20 @@ vpxor (%rax), %ymm1, %ymm2
649649
# CHECK-NEXT: 2 9 1.00 * vpmovzxwd (%rax), %ymm2
650650
# CHECK-NEXT: 1 3 1.00 vpmovzxwq %xmm0, %ymm2
651651
# CHECK-NEXT: 2 10 1.00 * vpmovzxwq (%rax), %ymm2
652-
# CHECK-NEXT: 1 4 0.50 vpmuldq %ymm0, %ymm1, %ymm2
653-
# CHECK-NEXT: 2 11 0.50 * vpmuldq (%rax), %ymm1, %ymm2
654-
# CHECK-NEXT: 1 4 0.50 vpmulhrsw %ymm0, %ymm1, %ymm2
655-
# CHECK-NEXT: 2 11 0.50 * vpmulhrsw (%rax), %ymm1, %ymm2
656-
# CHECK-NEXT: 1 4 0.50 vpmulhuw %ymm0, %ymm1, %ymm2
657-
# CHECK-NEXT: 2 11 0.50 * vpmulhuw (%rax), %ymm1, %ymm2
658-
# CHECK-NEXT: 1 4 0.50 vpmulhw %ymm0, %ymm1, %ymm2
659-
# CHECK-NEXT: 2 11 0.50 * vpmulhw (%rax), %ymm1, %ymm2
652+
# CHECK-NEXT: 1 5 0.50 vpmuldq %ymm0, %ymm1, %ymm2
653+
# CHECK-NEXT: 2 12 0.50 * vpmuldq (%rax), %ymm1, %ymm2
654+
# CHECK-NEXT: 1 5 0.50 vpmulhrsw %ymm0, %ymm1, %ymm2
655+
# CHECK-NEXT: 2 12 0.50 * vpmulhrsw (%rax), %ymm1, %ymm2
656+
# CHECK-NEXT: 1 5 0.50 vpmulhuw %ymm0, %ymm1, %ymm2
657+
# CHECK-NEXT: 2 12 0.50 * vpmulhuw (%rax), %ymm1, %ymm2
658+
# CHECK-NEXT: 1 5 0.50 vpmulhw %ymm0, %ymm1, %ymm2
659+
# CHECK-NEXT: 2 12 0.50 * vpmulhw (%rax), %ymm1, %ymm2
660660
# CHECK-NEXT: 2 10 1.00 vpmulld %ymm0, %ymm1, %ymm2
661661
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %ymm1, %ymm2
662-
# CHECK-NEXT: 1 4 0.50 vpmullw %ymm0, %ymm1, %ymm2
663-
# CHECK-NEXT: 2 11 0.50 * vpmullw (%rax), %ymm1, %ymm2
664-
# CHECK-NEXT: 1 4 0.50 vpmuludq %ymm0, %ymm1, %ymm2
665-
# CHECK-NEXT: 2 11 0.50 * vpmuludq (%rax), %ymm1, %ymm2
662+
# CHECK-NEXT: 1 5 0.50 vpmullw %ymm0, %ymm1, %ymm2
663+
# CHECK-NEXT: 2 12 0.50 * vpmullw (%rax), %ymm1, %ymm2
664+
# CHECK-NEXT: 1 5 0.50 vpmuludq %ymm0, %ymm1, %ymm2
665+
# CHECK-NEXT: 2 12 0.50 * vpmuludq (%rax), %ymm1, %ymm2
666666
# CHECK-NEXT: 1 1 0.33 vpor %ymm0, %ymm1, %ymm2
667667
# CHECK-NEXT: 2 8 0.50 * vpor (%rax), %ymm1, %ymm2
668668
# CHECK-NEXT: 1 3 1.00 vpsadbw %ymm0, %ymm1, %ymm2

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-mmx.s

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,12 @@ pxor (%rax), %mm2
209209
# CHECK-NEXT: 2 6 1.00 * pcmpgtd (%rax), %mm2
210210
# CHECK-NEXT: 1 1 1.00 pcmpgtw %mm0, %mm2
211211
# CHECK-NEXT: 2 6 1.00 * pcmpgtw (%rax), %mm2
212-
# CHECK-NEXT: 1 4 1.00 pmaddwd %mm0, %mm2
213-
# CHECK-NEXT: 2 9 1.00 * pmaddwd (%rax), %mm2
214-
# CHECK-NEXT: 1 4 1.00 pmulhw %mm0, %mm2
215-
# CHECK-NEXT: 2 9 1.00 * pmulhw (%rax), %mm2
216-
# CHECK-NEXT: 1 4 1.00 pmullw %mm0, %mm2
217-
# CHECK-NEXT: 2 9 1.00 * pmullw (%rax), %mm2
212+
# CHECK-NEXT: 1 5 1.00 pmaddwd %mm0, %mm2
213+
# CHECK-NEXT: 2 10 1.00 * pmaddwd (%rax), %mm2
214+
# CHECK-NEXT: 1 5 1.00 pmulhw %mm0, %mm2
215+
# CHECK-NEXT: 2 10 1.00 * pmulhw (%rax), %mm2
216+
# CHECK-NEXT: 1 5 1.00 pmullw %mm0, %mm2
217+
# CHECK-NEXT: 2 10 1.00 * pmullw (%rax), %mm2
218218
# CHECK-NEXT: 1 1 0.50 por %mm0, %mm2
219219
# CHECK-NEXT: 2 6 0.50 * por (%rax), %mm2
220220
# CHECK-NEXT: 1 1 1.00 pslld $1, %mm2

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,8 @@ xorps (%rax), %xmm2
280280
# CHECK-NEXT: 1 1 1.00 pminub %mm0, %mm2
281281
# CHECK-NEXT: 2 6 1.00 * pminub (%rax), %mm2
282282
# CHECK-NEXT: 1 2 1.00 pmovmskb %mm0, %ecx
283-
# CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2
284-
# CHECK-NEXT: 2 9 1.00 * pmulhuw (%rax), %mm2
283+
# CHECK-NEXT: 1 5 1.00 pmulhuw %mm0, %mm2
284+
# CHECK-NEXT: 2 10 1.00 * pmulhuw (%rax), %mm2
285285
# CHECK-NEXT: 1 5 0.50 * * prefetcht0 (%rax)
286286
# CHECK-NEXT: 1 5 0.50 * * prefetcht1 (%rax)
287287
# CHECK-NEXT: 1 5 0.50 * * prefetcht2 (%rax)

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,8 @@ xorpd (%rax), %xmm2
563563
# CHECK-NEXT: 2 3 1.00 pextrw $1, %xmm0, %ecx
564564
# CHECK-NEXT: 2 2 2.00 pinsrw $1, %eax, %xmm0
565565
# CHECK-NEXT: 2 6 1.00 * pinsrw $1, (%rax), %xmm0
566-
# CHECK-NEXT: 1 4 0.50 pmaddwd %xmm0, %xmm2
567-
# CHECK-NEXT: 2 10 0.50 * pmaddwd (%rax), %xmm2
566+
# CHECK-NEXT: 1 5 0.50 pmaddwd %xmm0, %xmm2
567+
# CHECK-NEXT: 2 11 0.50 * pmaddwd (%rax), %xmm2
568568
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
569569
# CHECK-NEXT: 2 7 0.50 * pmaxsw (%rax), %xmm2
570570
# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2
@@ -574,16 +574,16 @@ xorpd (%rax), %xmm2
574574
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
575575
# CHECK-NEXT: 2 7 0.50 * pminub (%rax), %xmm2
576576
# CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx
577-
# CHECK-NEXT: 1 4 0.50 pmulhuw %xmm0, %xmm2
578-
# CHECK-NEXT: 2 10 0.50 * pmulhuw (%rax), %xmm2
579-
# CHECK-NEXT: 1 4 0.50 pmulhw %xmm0, %xmm2
580-
# CHECK-NEXT: 2 10 0.50 * pmulhw (%rax), %xmm2
581-
# CHECK-NEXT: 1 4 0.50 pmullw %xmm0, %xmm2
582-
# CHECK-NEXT: 2 10 0.50 * pmullw (%rax), %xmm2
583-
# CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2
584-
# CHECK-NEXT: 2 9 1.00 * pmuludq (%rax), %mm2
585-
# CHECK-NEXT: 1 4 0.50 pmuludq %xmm0, %xmm2
586-
# CHECK-NEXT: 2 10 0.50 * pmuludq (%rax), %xmm2
577+
# CHECK-NEXT: 1 5 0.50 pmulhuw %xmm0, %xmm2
578+
# CHECK-NEXT: 2 11 0.50 * pmulhuw (%rax), %xmm2
579+
# CHECK-NEXT: 1 5 0.50 pmulhw %xmm0, %xmm2
580+
# CHECK-NEXT: 2 11 0.50 * pmulhw (%rax), %xmm2
581+
# CHECK-NEXT: 1 5 0.50 pmullw %xmm0, %xmm2
582+
# CHECK-NEXT: 2 11 0.50 * pmullw (%rax), %xmm2
583+
# CHECK-NEXT: 1 5 1.00 pmuludq %mm0, %mm2
584+
# CHECK-NEXT: 2 10 1.00 * pmuludq (%rax), %mm2
585+
# CHECK-NEXT: 1 5 0.50 pmuludq %xmm0, %xmm2
586+
# CHECK-NEXT: 2 11 0.50 * pmuludq (%rax), %xmm2
587587
# CHECK-NEXT: 1 1 0.33 por %xmm0, %xmm2
588588
# CHECK-NEXT: 2 7 0.50 * por (%rax), %xmm2
589589
# CHECK-NEXT: 1 3 1.00 psadbw %xmm0, %xmm2

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,8 @@ roundss $1, (%rax), %xmm2
237237
# CHECK-NEXT: 2 6 1.00 * pmovzxwd (%rax), %xmm2
238238
# CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2
239239
# CHECK-NEXT: 2 6 1.00 * pmovzxwq (%rax), %xmm2
240-
# CHECK-NEXT: 1 4 0.50 pmuldq %xmm0, %xmm2
241-
# CHECK-NEXT: 2 10 0.50 * pmuldq (%rax), %xmm2
240+
# CHECK-NEXT: 1 5 0.50 pmuldq %xmm0, %xmm2
241+
# CHECK-NEXT: 2 11 0.50 * pmuldq (%rax), %xmm2
242242
# CHECK-NEXT: 2 10 1.00 pmulld %xmm0, %xmm2
243243
# CHECK-NEXT: 3 16 1.00 * pmulld (%rax), %xmm2
244244
# CHECK-NEXT: 2 3 1.00 ptest %xmm0, %xmm1

llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-ssse3.s

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,14 @@ psignw (%rax), %xmm2
146146
# CHECK-NEXT: 4 8 2.00 * phsubw (%rax), %mm2
147147
# CHECK-NEXT: 3 3 2.00 phsubw %xmm0, %xmm2
148148
# CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2
149-
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
150-
# CHECK-NEXT: 2 9 1.00 * pmaddubsw (%rax), %mm2
151-
# CHECK-NEXT: 1 4 0.50 pmaddubsw %xmm0, %xmm2
152-
# CHECK-NEXT: 2 10 0.50 * pmaddubsw (%rax), %xmm2
153-
# CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2
154-
# CHECK-NEXT: 2 9 1.00 * pmulhrsw (%rax), %mm2
155-
# CHECK-NEXT: 1 4 0.50 pmulhrsw %xmm0, %xmm2
156-
# CHECK-NEXT: 2 10 0.50 * pmulhrsw (%rax), %xmm2
149+
# CHECK-NEXT: 1 5 1.00 pmaddubsw %mm0, %mm2
150+
# CHECK-NEXT: 2 10 1.00 * pmaddubsw (%rax), %mm2
151+
# CHECK-NEXT: 1 5 0.50 pmaddubsw %xmm0, %xmm2
152+
# CHECK-NEXT: 2 11 0.50 * pmaddubsw (%rax), %xmm2
153+
# CHECK-NEXT: 1 5 1.00 pmulhrsw %mm0, %mm2
154+
# CHECK-NEXT: 2 10 1.00 * pmulhrsw (%rax), %mm2
155+
# CHECK-NEXT: 1 5 0.50 pmulhrsw %xmm0, %xmm2
156+
# CHECK-NEXT: 2 11 0.50 * pmulhrsw (%rax), %xmm2
157157
# CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2
158158
# CHECK-NEXT: 2 6 1.00 * pshufb (%rax), %mm2
159159
# CHECK-NEXT: 1 1 1.00 pshufb %xmm0, %xmm2

0 commit comments

Comments
 (0)