Skip to content

Commit 02d5680

Browse files
authored
[X86] Support APX promoted RAO-INT and MOVBE instructions (#77431)
R16-R31 was added into GPRs in #70958, This patch supports the promoted RAO-INT and MOVBE instructions in EVEX space. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
1 parent d9d1ae6 commit 02d5680

File tree

14 files changed

+754
-70
lines changed

14 files changed

+754
-70
lines changed

llvm/lib/Target/X86/X86InstrMisc.td

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def PUSHA16 : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
229229
OpSize16, Requires<[Not64BitMode]>;
230230
}
231231

232-
let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32] in {
232+
let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32], Predicates = [NoNDD_Or_NoMOVBE] in {
233233
// This instruction is a consequence of BSWAP32r observing operand size. The
234234
// encoding is valid, but the behavior is undefined.
235235
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
@@ -1090,35 +1090,43 @@ def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
10901090
//===----------------------------------------------------------------------===//
10911091
// MOVBE Instructions
10921092
//
1093-
let Predicates = [HasMOVBE] in {
1094-
let SchedRW = [WriteALULd] in {
1095-
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
1096-
"movbe{w}\t{$src, $dst|$dst, $src}",
1097-
[(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
1098-
OpSize16, T8;
1099-
def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
1100-
"movbe{l}\t{$src, $dst|$dst, $src}",
1101-
[(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
1102-
OpSize32, T8;
1103-
def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
1104-
"movbe{q}\t{$src, $dst|$dst, $src}",
1105-
[(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
1106-
T8;
1107-
}
1108-
let SchedRW = [WriteStore] in {
1109-
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
1110-
"movbe{w}\t{$src, $dst|$dst, $src}",
1111-
[(store (bswap GR16:$src), addr:$dst)]>,
1112-
OpSize16, T8;
1113-
def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
1114-
"movbe{l}\t{$src, $dst|$dst, $src}",
1115-
[(store (bswap GR32:$src), addr:$dst)]>,
1116-
OpSize32, T8;
1117-
def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
1118-
"movbe{q}\t{$src, $dst|$dst, $src}",
1119-
[(store (bswap GR64:$src), addr:$dst)]>,
1120-
T8;
1121-
}
1093+
multiclass Movbe<bits<8> o, X86TypeInfo t, string suffix = ""> {
1094+
def rm#suffix : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst),
1095+
(ins t.MemOperand:$src1), "movbe", unaryop_ndd_args,
1096+
[(set t.RegClass:$dst, (bswap (t.LoadNode addr:$src1)))]>,
1097+
Sched<[WriteALULd]>;
1098+
def mr#suffix : ITy<!add(o, 1), MRMDestMem, t, (outs),
1099+
(ins t.MemOperand:$dst, t.RegClass:$src1),
1100+
"movbe", unaryop_ndd_args,
1101+
[(store (bswap t.RegClass:$src1), addr:$dst)]>,
1102+
Sched<[WriteStore]>;
1103+
}
1104+
1105+
let Predicates = [HasMOVBE, NoEGPR] in {
1106+
defm MOVBE16 : Movbe<0xF0, Xi16>, OpSize16, T8;
1107+
defm MOVBE32 : Movbe<0xF0, Xi32>, OpSize32, T8;
1108+
defm MOVBE64 : Movbe<0xF0, Xi64>, T8;
1109+
}
1110+
1111+
let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
1112+
defm MOVBE16 : Movbe<0x60, Xi16, "_EVEX">, EVEX, T_MAP4, PD;
1113+
defm MOVBE32 : Movbe<0x60, Xi32, "_EVEX">, EVEX, T_MAP4;
1114+
defm MOVBE64 : Movbe<0x60, Xi64, "_EVEX">, EVEX, T_MAP4;
1115+
}
1116+
1117+
multiclass Movberr<X86TypeInfo t> {
1118+
def rr : ITy<0x61, MRMDestReg, t, (outs t.RegClass:$dst),
1119+
(ins t.RegClass:$src1), "movbe", unaryop_ndd_args,
1120+
[(set t.RegClass:$dst, (bswap t.RegClass:$src1))]>,
1121+
EVEX, T_MAP4;
1122+
def rr_REV : ITy<0x60, MRMSrcReg, t, (outs t.RegClass:$dst),
1123+
(ins t.RegClass:$src1), "movbe", unaryop_ndd_args, []>,
1124+
EVEX, T_MAP4, DisassembleOnly;
1125+
}
1126+
let SchedRW = [WriteALU], Predicates = [HasMOVBE, HasNDD, In64BitMode] in {
1127+
defm MOVBE16 : Movberr<Xi16>, PD;
1128+
defm MOVBE32 : Movberr<Xi32>;
1129+
defm MOVBE64 : Movberr<Xi64>;
11221130
}
11231131

11241132
//===----------------------------------------------------------------------===//

llvm/lib/Target/X86/X86InstrPredicates.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def HasTBM : Predicate<"Subtarget->hasTBM()">;
122122
def NoTBM : Predicate<"!Subtarget->hasTBM()">;
123123
def HasLWP : Predicate<"Subtarget->hasLWP()">;
124124
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
125+
def NoNDD_Or_NoMOVBE : Predicate<"!Subtarget->hasNDD() || !Subtarget->hasMOVBE()">;
125126
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
126127
def HasF16C : Predicate<"Subtarget->hasF16C()">;
127128
def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;

llvm/lib/Target/X86/X86InstrRAOINT.td

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,23 @@ def X86rao_xor : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith,
2525
def X86rao_and : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
2626
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
2727

28-
multiclass RAOINT_BASE<string OpcodeStr> {
29-
let Predicates = [HasRAOINT] in
30-
def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
31-
!strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
32-
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
33-
Sched<[WriteALURMW]>;
28+
multiclass RaoInt<string m, string suffix = ""> {
29+
let Pattern = [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR32:$src2)] in
30+
def 32mr#suffix : BinOpMR_M<0xfc, "a" # m, Xi32>;
31+
let Pattern = [(!cast<SDNode>("X86rao_" # m) addr:$src1, GR64:$src2)] in
32+
def 64mr#suffix : BinOpMR_M<0xfc, "a" # m, Xi64>;
33+
}
3434

35-
let Predicates = [HasRAOINT, In64BitMode] in
36-
def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
37-
!strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
38-
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
39-
Sched<[WriteALURMW]>, REX_W;
35+
let Predicates = [HasRAOINT, NoEGPR] in {
36+
defm AADD : RaoInt<"add">, T8;
37+
defm AAND : RaoInt<"and">, T8, PD;
38+
defm AOR : RaoInt<"or" >, T8, XD;
39+
defm AXOR : RaoInt<"xor">, T8, XS;
4040
}
4141

42-
defm AADD : RAOINT_BASE<"add">, T8;
43-
defm AAND : RAOINT_BASE<"and">, T8, PD;
44-
defm AOR : RAOINT_BASE<"or" >, T8, XD;
45-
defm AXOR : RAOINT_BASE<"xor">, T8, XS;
42+
let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
43+
defm AADD : RaoInt<"add", "_EVEX">, EVEX, T_MAP4;
44+
defm AAND : RaoInt<"and", "_EVEX">, EVEX, T_MAP4, PD;
45+
defm AOR : RaoInt<"or", "_EVEX">, EVEX, T_MAP4, XD;
46+
defm AXOR : RaoInt<"xor", "_EVEX">, EVEX, T_MAP4, XS;
47+
}

llvm/test/CodeGen/X86/movbe.ll

Lines changed: 189 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,230 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
12
; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
23
; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM
3-
4+
; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd,+movbe --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
5+
; RUN: llc -mtriple=x86_64-linux -mattr=+egpr,+ndd --show-mc-encoding < %s | FileCheck %s -check-prefix=NOMOVBE
46
declare i16 @llvm.bswap.i16(i16) nounwind readnone
57
declare i32 @llvm.bswap.i32(i32) nounwind readnone
68
declare i64 @llvm.bswap.i64(i64) nounwind readnone
79

810
define void @test1(ptr nocapture %x, i16 %y) nounwind {
11+
; CHECK-LABEL: test1:
12+
; CHECK: # %bb.0:
13+
; CHECK-NEXT: movbew %si, (%rdi)
14+
; CHECK-NEXT: nop
15+
; CHECK-NEXT: nop
16+
; CHECK-NEXT: nop
17+
; CHECK-NEXT: nop
18+
; CHECK-NEXT: nop
19+
; CHECK-NEXT: nop
20+
; CHECK-NEXT: retq
21+
;
22+
; SLM-LABEL: test1:
23+
; SLM: # %bb.0:
24+
; SLM-NEXT: movbew %si, (%rdi)
25+
; SLM-NEXT: retq
26+
;
27+
; EGPR-LABEL: test1:
28+
; EGPR: # %bb.0:
29+
; EGPR-NEXT: movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
30+
; EGPR-NEXT: retq # encoding: [0xc3]
31+
;
32+
; NOMOVBE-LABEL: test1:
33+
; NOMOVBE: # %bb.0:
34+
; NOMOVBE-NEXT: rolw $8, %si, %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0xc6,0x08]
35+
; NOMOVBE-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07]
36+
; NOMOVBE-NEXT: retq # encoding: [0xc3]
937
%bswap = call i16 @llvm.bswap.i16(i16 %y)
1038
store i16 %bswap, ptr %x, align 2
1139
ret void
12-
; CHECK-LABEL: test1:
13-
; CHECK: movbew %si, (%rdi)
14-
; SLM-LABEL: test1:
15-
; SLM: movbew %si, (%rdi)
1640
}
1741

1842
define i16 @test2(ptr %x) nounwind {
43+
; CHECK-LABEL: test2:
44+
; CHECK: # %bb.0:
45+
; CHECK-NEXT: movbew (%rdi), %ax
46+
; CHECK-NEXT: nop
47+
; CHECK-NEXT: nop
48+
; CHECK-NEXT: nop
49+
; CHECK-NEXT: nop
50+
; CHECK-NEXT: nop
51+
; CHECK-NEXT: nop
52+
; CHECK-NEXT: retq
53+
;
54+
; SLM-LABEL: test2:
55+
; SLM: # %bb.0:
56+
; SLM-NEXT: movbew (%rdi), %ax
57+
; SLM-NEXT: retq
58+
;
59+
; EGPR-LABEL: test2:
60+
; EGPR: # %bb.0:
61+
; EGPR-NEXT: movbew (%rdi), %ax # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf0,0x07]
62+
; EGPR-NEXT: retq # encoding: [0xc3]
63+
;
64+
; NOMOVBE-LABEL: test2:
65+
; NOMOVBE: # %bb.0:
66+
; NOMOVBE-NEXT: rolw $8, (%rdi), %ax # encoding: [0x62,0xf4,0x7d,0x18,0xc1,0x07,0x08]
67+
; NOMOVBE-NEXT: retq # encoding: [0xc3]
1968
%load = load i16, ptr %x, align 2
2069
%bswap = call i16 @llvm.bswap.i16(i16 %load)
2170
ret i16 %bswap
22-
; CHECK-LABEL: test2:
23-
; CHECK: movbew (%rdi), %ax
24-
; SLM-LABEL: test2:
25-
; SLM: movbew (%rdi), %ax
2671
}
2772

2873
define void @test3(ptr nocapture %x, i32 %y) nounwind {
74+
; CHECK-LABEL: test3:
75+
; CHECK: # %bb.0:
76+
; CHECK-NEXT: movbel %esi, (%rdi)
77+
; CHECK-NEXT: nop
78+
; CHECK-NEXT: nop
79+
; CHECK-NEXT: nop
80+
; CHECK-NEXT: nop
81+
; CHECK-NEXT: nop
82+
; CHECK-NEXT: nop
83+
; CHECK-NEXT: retq
84+
;
85+
; SLM-LABEL: test3:
86+
; SLM: # %bb.0:
87+
; SLM-NEXT: movbel %esi, (%rdi)
88+
; SLM-NEXT: retq
89+
;
90+
; EGPR-LABEL: test3:
91+
; EGPR: # %bb.0:
92+
; EGPR-NEXT: movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
93+
; EGPR-NEXT: retq # encoding: [0xc3]
94+
;
95+
; NOMOVBE-LABEL: test3:
96+
; NOMOVBE: # %bb.0:
97+
; NOMOVBE-NEXT: bswapl %esi # encoding: [0x0f,0xce]
98+
; NOMOVBE-NEXT: movl %esi, (%rdi) # encoding: [0x89,0x37]
99+
; NOMOVBE-NEXT: retq # encoding: [0xc3]
29100
%bswap = call i32 @llvm.bswap.i32(i32 %y)
30101
store i32 %bswap, ptr %x, align 4
31102
ret void
32-
; CHECK-LABEL: test3:
33-
; CHECK: movbel %esi, (%rdi)
34-
; SLM-LABEL: test3:
35-
; SLM: movbel %esi, (%rdi)
36103
}
37104

38105
define i32 @test4(ptr %x) nounwind {
106+
; CHECK-LABEL: test4:
107+
; CHECK: # %bb.0:
108+
; CHECK-NEXT: movbel (%rdi), %eax
109+
; CHECK-NEXT: nop
110+
; CHECK-NEXT: nop
111+
; CHECK-NEXT: nop
112+
; CHECK-NEXT: nop
113+
; CHECK-NEXT: nop
114+
; CHECK-NEXT: nop
115+
; CHECK-NEXT: retq
116+
;
117+
; SLM-LABEL: test4:
118+
; SLM: # %bb.0:
119+
; SLM-NEXT: movbel (%rdi), %eax
120+
; SLM-NEXT: retq
121+
;
122+
; EGPR-LABEL: test4:
123+
; EGPR: # %bb.0:
124+
; EGPR-NEXT: movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
125+
; EGPR-NEXT: retq # encoding: [0xc3]
126+
;
127+
; NOMOVBE-LABEL: test4:
128+
; NOMOVBE: # %bb.0:
129+
; NOMOVBE-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
130+
; NOMOVBE-NEXT: bswapl %eax # encoding: [0x0f,0xc8]
131+
; NOMOVBE-NEXT: retq # encoding: [0xc3]
39132
%load = load i32, ptr %x, align 4
40133
%bswap = call i32 @llvm.bswap.i32(i32 %load)
41134
ret i32 %bswap
42-
; CHECK-LABEL: test4:
43-
; CHECK: movbel (%rdi), %eax
44-
; SLM-LABEL: test4:
45-
; SLM: movbel (%rdi), %eax
46135
}
47136

48137
define void @test5(ptr %x, i64 %y) nounwind {
138+
; CHECK-LABEL: test5:
139+
; CHECK: # %bb.0:
140+
; CHECK-NEXT: movbeq %rsi, (%rdi)
141+
; CHECK-NEXT: nop
142+
; CHECK-NEXT: nop
143+
; CHECK-NEXT: nop
144+
; CHECK-NEXT: nop
145+
; CHECK-NEXT: nop
146+
; CHECK-NEXT: nop
147+
; CHECK-NEXT: retq
148+
;
149+
; SLM-LABEL: test5:
150+
; SLM: # %bb.0:
151+
; SLM-NEXT: movbeq %rsi, (%rdi)
152+
; SLM-NEXT: retq
153+
;
154+
; EGPR-LABEL: test5:
155+
; EGPR: # %bb.0:
156+
; EGPR-NEXT: movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
157+
; EGPR-NEXT: retq # encoding: [0xc3]
158+
;
159+
; NOMOVBE-LABEL: test5:
160+
; NOMOVBE: # %bb.0:
161+
; NOMOVBE-NEXT: bswapq %rsi # encoding: [0x48,0x0f,0xce]
162+
; NOMOVBE-NEXT: movq %rsi, (%rdi) # encoding: [0x48,0x89,0x37]
163+
; NOMOVBE-NEXT: retq # encoding: [0xc3]
49164
%bswap = call i64 @llvm.bswap.i64(i64 %y)
50165
store i64 %bswap, ptr %x, align 8
51166
ret void
52-
; CHECK-LABEL: test5:
53-
; CHECK: movbeq %rsi, (%rdi)
54-
; SLM-LABEL: test5:
55-
; SLM: movbeq %rsi, (%rdi)
56167
}
57168

58169
define i64 @test6(ptr %x) nounwind {
170+
; CHECK-LABEL: test6:
171+
; CHECK: # %bb.0:
172+
; CHECK-NEXT: movbeq (%rdi), %rax
173+
; CHECK-NEXT: nop
174+
; CHECK-NEXT: nop
175+
; CHECK-NEXT: nop
176+
; CHECK-NEXT: nop
177+
; CHECK-NEXT: nop
178+
; CHECK-NEXT: nop
179+
; CHECK-NEXT: retq
180+
;
181+
; SLM-LABEL: test6:
182+
; SLM: # %bb.0:
183+
; SLM-NEXT: movbeq (%rdi), %rax
184+
; SLM-NEXT: retq
185+
;
186+
; EGPR-LABEL: test6:
187+
; EGPR: # %bb.0:
188+
; EGPR-NEXT: movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
189+
; EGPR-NEXT: retq # encoding: [0xc3]
190+
;
191+
; NOMOVBE-LABEL: test6:
192+
; NOMOVBE: # %bb.0:
193+
; NOMOVBE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
194+
; NOMOVBE-NEXT: bswapq %rax # encoding: [0x48,0x0f,0xc8]
195+
; NOMOVBE-NEXT: retq # encoding: [0xc3]
59196
%load = load i64, ptr %x, align 8
60197
%bswap = call i64 @llvm.bswap.i64(i64 %load)
61198
ret i64 %bswap
62-
; CHECK-LABEL: test6:
63-
; CHECK: movbeq (%rdi), %rax
64-
; SLM-LABEL: test6:
65-
; SLM: movbeq (%rdi), %rax
199+
}
200+
201+
define i64 @test7(i64 %x) nounwind {
202+
; CHECK-LABEL: test7:
203+
; CHECK: # %bb.0:
204+
; CHECK-NEXT: movq %rdi, %rax
205+
; CHECK-NEXT: bswapq %rax
206+
; CHECK-NEXT: nop
207+
; CHECK-NEXT: nop
208+
; CHECK-NEXT: nop
209+
; CHECK-NEXT: nop
210+
; CHECK-NEXT: retq
211+
;
212+
; SLM-LABEL: test7:
213+
; SLM: # %bb.0:
214+
; SLM-NEXT: movq %rdi, %rax
215+
; SLM-NEXT: bswapq %rax
216+
; SLM-NEXT: retq
217+
;
218+
; EGPR-LABEL: test7:
219+
; EGPR: # %bb.0:
220+
; EGPR-NEXT: movbeq %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0x61,0xf8]
221+
; EGPR-NEXT: retq # encoding: [0xc3]
222+
;
223+
; NOMOVBE-LABEL: test7:
224+
; NOMOVBE: # %bb.0:
225+
; NOMOVBE-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
226+
; NOMOVBE-NEXT: bswapq %rax # encoding: [0x48,0x0f,0xc8]
227+
; NOMOVBE-NEXT: retq # encoding: [0xc3]
228+
%bswap = call i64 @llvm.bswap.i64(i64 %x)
229+
ret i64 %bswap
66230
}

0 commit comments

Comments
 (0)