Skip to content

Commit 653cb8b

Browse files
Jessica Paquettememfrob
Jessica Paquette
authored and
memfrob
committed
[AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO. If we have a wide immediate which can't be represented in an add, we can end up with code like this: ``` mov x0, imm add x1, base, x0 ldr x2, [x1, 0] ``` If we use the [base, xN] addressing mode instead, we can produce this: ``` mov x0, imm ldr x2, [base, x0] ``` This saves 0.4% code size on 7zip at -O3, and gives a geomean code size improvement of 0.1% on CTMark. Differential Revision: https://reviews.llvm.org/D84784
1 parent 2b2021b commit 653cb8b

File tree

2 files changed

+263
-4
lines changed

2 files changed

+263
-4
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

+52-4
Original file line numberDiff line numberDiff line change
@@ -5083,12 +5083,60 @@ InstructionSelector::ComplexRendererFns
50835083
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
50845084
unsigned SizeInBytes) const {
50855085
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5086-
5087-
// If we have a constant offset, then we probably don't want to match a
5088-
// register offset.
5089-
if (isBaseWithConstantOffset(Root, MRI))
5086+
if (!Root.isReg())
5087+
return None;
5088+
MachineInstr *PtrAdd =
5089+
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5090+
if (!PtrAdd)
50905091
return None;
50915092

5093+
// Check for an immediates which cannot be encoded in the [base + imm]
5094+
// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5095+
// end up with code like:
5096+
//
5097+
// mov x0, wide
5098+
// add x1 base, x0
5099+
// ldr x2, [x1, x0]
5100+
//
5101+
// In this situation, we can use the [base, xreg] addressing mode to save an
5102+
// add/sub:
5103+
//
5104+
// mov x0, wide
5105+
// ldr x2, [base, x0]
5106+
auto ValAndVReg =
5107+
getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5108+
if (ValAndVReg) {
5109+
unsigned Scale = Log2_32(SizeInBytes);
5110+
int64_t ImmOff = ValAndVReg->Value;
5111+
5112+
// Skip immediates that can be selected in the load/store addresing
5113+
// mode.
5114+
if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5115+
ImmOff < (0x1000 << Scale))
5116+
return None;
5117+
5118+
// Helper lambda to decide whether or not it is preferable to emit an add.
5119+
auto isPreferredADD = [](int64_t ImmOff) {
5120+
// Constants in [0x0, 0xfff] can be encoded in an add.
5121+
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5122+
return true;
5123+
5124+
// Can it be encoded in an add lsl #12?
5125+
if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5126+
return false;
5127+
5128+
// It can be encoded in an add lsl #12, but we may not want to. If it is
5129+
// possible to select this as a single movz, then prefer that. A single
5130+
// movz is faster than an add with a shift.
5131+
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5132+
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5133+
};
5134+
5135+
// If the immediate can be encoded in a single add/sub, then bail out.
5136+
if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5137+
return None;
5138+
}
5139+
50925140
// Try to fold shifts into the addressing mode.
50935141
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
50945142
if (AddrModeFns)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
#
4+
# Test using the xro addressing mode with immediates. This should be done for
5+
# wide constants which are preferably selected using a mov rather than an add.
6+
7+
...
8+
---
9+
name: use_xro_cannot_encode_add_lsl
10+
alignment: 4
11+
legalized: true
12+
regBankSelected: true
13+
tracksRegLiveness: true
14+
body: |
15+
bb.0:
16+
liveins: $x0
17+
; Check that we use the XRO addressing mode when the constant cannot be
18+
; represented using an add + lsl.
19+
;
20+
; cst = 0000000111000000
21+
; cst & 000fffffff000000 != 0
22+
;
23+
; CHECK-LABEL: name: use_xro_cannot_encode_add_lsl
24+
; CHECK: liveins: $x0
25+
; CHECK: %copy:gpr64sp = COPY $x0
26+
; CHECK: %cst:gpr64 = MOVi64imm 4580179968
27+
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
28+
; CHECK: RET_ReallyLR
29+
%copy:gpr(p0) = COPY $x0
30+
%cst:gpr(s64) = G_CONSTANT i64 4580179968
31+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
32+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
33+
RET_ReallyLR
34+
35+
...
36+
---
37+
name: use_xro_preferred_mov
38+
alignment: 4
39+
legalized: true
40+
regBankSelected: true
41+
tracksRegLiveness: true
42+
body: |
43+
bb.0:
44+
liveins: $x0
45+
; Check that we use the XRO addressing mode when the constant can be
46+
; represented using a single movk.
47+
;
48+
; cst = 000000000000f000
49+
; cst & 000fffffff000000 == 0
50+
; cst & ffffffffffff0fff != 0
51+
;
52+
; CHECK-LABEL: name: use_xro_preferred_mov
53+
; CHECK: liveins: $x0
54+
; CHECK: %copy:gpr64sp = COPY $x0
55+
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 61440
56+
; CHECK: %cst:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
57+
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
58+
; CHECK: RET_ReallyLR
59+
%copy:gpr(p0) = COPY $x0
60+
%cst:gpr(s64) = G_CONSTANT i64 61440
61+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
62+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
63+
RET_ReallyLR
64+
65+
...
66+
---
67+
name: use_xro_negative_imm
68+
alignment: 4
69+
legalized: true
70+
regBankSelected: true
71+
tracksRegLiveness: true
72+
body: |
73+
bb.0:
74+
liveins: $x0
75+
; Check that this works even if we have a negative immediate.
76+
;
77+
; CHECK-LABEL: name: use_xro_negative_imm
78+
; CHECK: liveins: $x0
79+
; CHECK: %copy:gpr64sp = COPY $x0
80+
; CHECK: %cst:gpr64 = MOVi64imm -61440
81+
; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
82+
; CHECK: RET_ReallyLR
83+
%copy:gpr(p0) = COPY $x0
84+
%cst:gpr(s64) = G_CONSTANT i64 -61440
85+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
86+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
87+
RET_ReallyLR
88+
89+
...
90+
---
91+
name: dont_use_xro_selectable_imm
92+
alignment: 4
93+
legalized: true
94+
regBankSelected: true
95+
tracksRegLiveness: true
96+
body: |
97+
bb.0:
98+
liveins: $x0
99+
; Immediates that can be encoded in a LDRXui should be skipped.
100+
;
101+
; CHECK-LABEL: name: dont_use_xro_selectable_imm
102+
; CHECK: liveins: $x0
103+
; CHECK: %copy:gpr64sp = COPY $x0
104+
; CHECK: %load:gpr64 = LDRXui %copy, 2 :: (volatile load 8)
105+
; CHECK: RET_ReallyLR
106+
%copy:gpr(p0) = COPY $x0
107+
%cst:gpr(s64) = G_CONSTANT i64 16
108+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
109+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
110+
RET_ReallyLR
111+
112+
...
113+
---
114+
name: dont_use_xro_selectable_negative_imm
115+
alignment: 4
116+
legalized: true
117+
regBankSelected: true
118+
tracksRegLiveness: true
119+
body: |
120+
bb.0:
121+
liveins: $x0
122+
; Immediates that can be encoded in a LDRXui should be skipped.
123+
;
124+
; CHECK-LABEL: name: dont_use_xro_selectable_negative_imm
125+
; CHECK: liveins: $x0
126+
; CHECK: %copy:gpr64sp = COPY $x0
127+
; CHECK: %load:gpr64 = LDURXi %copy, -16 :: (volatile load 8)
128+
; CHECK: RET_ReallyLR
129+
%copy:gpr(p0) = COPY $x0
130+
%cst:gpr(s64) = G_CONSTANT i64 -16
131+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
132+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
133+
RET_ReallyLR
134+
135+
...
136+
---
137+
name: dont_use_xro_zero
138+
alignment: 4
139+
legalized: true
140+
regBankSelected: true
141+
tracksRegLiveness: true
142+
body: |
143+
bb.0:
144+
liveins: $x0
145+
; Immediates that can be encoded in a LDRXui should be skipped.
146+
;
147+
; CHECK-LABEL: name: dont_use_xro_zero
148+
; CHECK: liveins: $x0
149+
; CHECK: %copy:gpr64sp = COPY $x0
150+
; CHECK: %load:gpr64 = LDRXui %copy, 0 :: (volatile load 8)
151+
; CHECK: RET_ReallyLR
152+
%copy:gpr(p0) = COPY $x0
153+
%cst:gpr(s64) = G_CONSTANT i64 0
154+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
155+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
156+
RET_ReallyLR
157+
158+
...
159+
---
160+
name: dont_use_xro_in_range
161+
alignment: 4
162+
legalized: true
163+
regBankSelected: true
164+
tracksRegLiveness: true
165+
body: |
166+
bb.0:
167+
liveins: $x0
168+
; Check that we skip constants which can be encoded in an add.
169+
; 17 is in [0x0, 0xfff]
170+
;
171+
; CHECK-LABEL: name: dont_use_xro_in_range
172+
; CHECK: liveins: $x0
173+
; CHECK: %copy:gpr64sp = COPY $x0
174+
; CHECK: %load:gpr64 = LDURXi %copy, 17 :: (volatile load 8)
175+
; CHECK: RET_ReallyLR
176+
%copy:gpr(p0) = COPY $x0
177+
%cst:gpr(s64) = G_CONSTANT i64 17
178+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
179+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
180+
RET_ReallyLR
181+
182+
...
183+
---
184+
name: dont_use_xro_add_lsl
185+
alignment: 4
186+
legalized: true
187+
regBankSelected: true
188+
tracksRegLiveness: true
189+
body: |
190+
bb.0:
191+
liveins: $x0
192+
; Check that we skip when we have an add with an lsl which cannot be
193+
; represented as a movk.
194+
;
195+
; cst = 0x0000000000111000
196+
; cst & 000fffffff000000 = 0
197+
; cst & ffffffffff00ffff != 0
198+
; cst & ffffffffffff0fff != 0
199+
;
200+
; CHECK-LABEL: name: dont_use_xro_add_lsl
201+
; CHECK: liveins: $x0
202+
; CHECK: %copy:gpr64 = COPY $x0
203+
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %copy
204+
; CHECK: %addr:gpr64sp = ADDXri [[COPY]], 273, 12
205+
; CHECK: %load:gpr64 = LDRXui %addr, 0 :: (volatile load 8)
206+
; CHECK: RET_ReallyLR
207+
%copy:gpr(p0) = COPY $x0
208+
%cst:gpr(s64) = G_CONSTANT i64 1118208
209+
%addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
210+
%load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
211+
RET_ReallyLR

0 commit comments

Comments
 (0)