Skip to content

Commit 1b86ad2

Browse files
committed
Use 15 byte long nops on modern Intel processors
Back in D42616, we switched our default nop length from 15 to 10 bytes because some platforms have painful decode stalls when encountering multiple instruction prefixes. (10 byte long nops come from the fact that prefixes are used to pad after 8 bytes, and some platforms have issues w/more than two prefixes.) Based on Agner's guides, it appears to be the case that modern Intel (SandyBridge and later) can decode an arbitrary number of prefixes without issue. Intel's guide only provides up to 9 bytes; I read that as providing a safe default for all their chips. Older chips and Atom series have serious decode stalls. I can't find a conclusive reference beyond those two. Differential Revision: https://reviews.llvm.org/D75945
1 parent a26bd4e commit 1b86ad2

File tree

4 files changed

+16
-12
lines changed

4 files changed

+16
-12
lines changed

llvm/lib/Target/X86/X86.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,8 @@ def ProcessorFeatures {
551551
FeatureSlow3OpsLEA,
552552
FeatureFastScalarFSQRT,
553553
FeatureFastSHLDRotate,
554-
FeatureMergeToThreeWayBranch];
554+
FeatureMergeToThreeWayBranch,
555+
FeatureFast15ByteNOP];
555556
list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
556557
FeaturePOPCNTFalseDeps];
557558
list<SubtargetFeature> SNBInheritableFeatures =

llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,12 @@ define void @patchpoint(i64 %a, i64 %b) {
6969
; CHECK-NEXT: .cfi_def_cfa_register %rbp
7070
; CHECK-NEXT: #noautopadding
7171
; CHECK-NEXT: .Ltmp2:
72+
; CHECK-NEXT: .byte 102
73+
; CHECK-NEXT: .byte 102
74+
; CHECK-NEXT: .byte 102
75+
; CHECK-NEXT: .byte 102
76+
; CHECK-NEXT: .byte 102
7277
; CHECK-NEXT: nopw %cs:512(%rax,%rax)
73-
; CHECK-NEXT: nopl 8(%rax,%rax)
7478
; CHECK-NEXT: #autopadding
7579
; CHECK-NEXT: popq %rbp
7680
; CHECK-NEXT: .cfi_def_cfa %rsp, 8

llvm/test/MC/X86/align-via-relaxation.s

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ foo:
3232
# that would require a further round of relaxation
3333
# CHECK: <bar>:
3434
# CHECK: 22: eb fe jmp -2 <bar>
35-
# CHECK: 24: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
36-
# CHECK: 2e: 66 90 nop
35+
# CHECK: 24: 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
3736
# CHECK: 30: 0f 0b ud2
3837

3938
bar:
@@ -48,8 +47,8 @@ nobypass:
4847
# CHECK: <loop_preheader>:
4948
# CHECK: 45: 48 85 c0 testq %rax, %rax
5049
# CHECK: 48: 0f 8e 22 00 00 00 jle 34 <loop_exit>
51-
# CHECK: 4e: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
52-
# CHECK: 58: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax)
50+
# CHECK: 4e: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax)
51+
# CHECK: 5d: 0f 1f 00 nopl (%rax)
5352
# CHECK: <loop_header>:
5453
# CHECK: 60: 48 83 e8 01 subq $1, %rax
5554
# CHECK: 64: 48 85 c0 testq %rax, %rax

llvm/test/MC/X86/x86_long_nop.s

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d -no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
1818
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
1919
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
20-
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
21-
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=ivybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
22-
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=haswell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
23-
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
24-
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
25-
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
20+
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
21+
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=ivybridge %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
22+
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=haswell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
23+
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
24+
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
25+
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
2626
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knl %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
2727
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knm %s | llvm-objdump -d -no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
2828

0 commit comments

Comments
 (0)