10
10
// ===----------------------------------------------------------------------===//
11
11
12
12
#include " llvm/CodeGen/GlobalISel/Localizer.h"
13
+ #include " llvm/Analysis/TargetTransformInfo.h"
13
14
#include " llvm/ADT/DenseMap.h"
14
15
#include " llvm/ADT/SmallPtrSet.h"
15
16
#include " llvm/CodeGen/MachineRegisterInfo.h"
20
21
using namespace llvm ;
21
22
22
23
char Localizer::ID = 0 ;
23
- INITIALIZE_PASS (Localizer, DEBUG_TYPE,
24
- " Move/duplicate certain instructions close to their use" , false ,
25
- false )
24
+ INITIALIZE_PASS_BEGIN (Localizer, DEBUG_TYPE,
25
+ " Move/duplicate certain instructions close to their use" ,
26
+ false , false )
27
+ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
28
+ INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
29
+ " Move/duplicate certain instructions close to their use" ,
30
+ false , false )
26
31
27
32
Localizer::Localizer() : MachineFunctionPass(ID) {
28
33
initializeLocalizerPass (*PassRegistry::getPassRegistry ());
29
34
}
30
35
31
- void Localizer::init (MachineFunction &MF) { MRI = &MF.getRegInfo (); }
36
+ void Localizer::init (MachineFunction &MF) {
37
+ MRI = &MF.getRegInfo ();
38
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (MF.getFunction ());
39
+ }
32
40
33
41
bool Localizer::shouldLocalize (const MachineInstr &MI) {
42
+ // Assuming a spill and reload of a value has a cost of 1 instruction each,
43
+ // this helper function computes the maximum number of uses we should consider
44
+ // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
45
+ // break even in terms of code size when the original MI has 2 users vs
46
+ // choosing to potentially spill. Any more than 2 users we we have a net code
47
+ // size increase. This doesn't take into account register pressure though.
48
+ auto maxUses = [](unsigned RematCost) {
49
+ // A cost of 1 means remats are basically free.
50
+ if (RematCost == 1 )
51
+ return UINT_MAX;
52
+ if (RematCost == 2 )
53
+ return 2U ;
54
+
55
+ // Remat is too expensive, only sink if there's one user.
56
+ if (RematCost > 2 )
57
+ return 1U ;
58
+ llvm_unreachable (" Unexpected remat cost" );
59
+ };
60
+
61
+ // Helper to walk through uses and terminate if we've reached a limit. Saves
62
+ // us spending time traversing uses if all we want to know is if it's >= min.
63
+ auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
64
+ unsigned NumUses = 0 ;
65
+ auto UI = MRI->use_instr_nodbg_begin (Reg), UE = MRI->use_instr_nodbg_end ();
66
+ for (; UI != UE && NumUses < MaxUses; ++UI) {
67
+ NumUses++;
68
+ }
69
+ // If we haven't reached the end yet then there are more than MaxUses users.
70
+ return UI == UE;
71
+ };
72
+
34
73
switch (MI.getOpcode ()) {
35
74
default :
36
75
return false ;
@@ -40,10 +79,20 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
40
79
case TargetOpcode::G_FCONSTANT:
41
80
case TargetOpcode::G_FRAME_INDEX:
42
81
return true ;
82
+ case TargetOpcode::G_GLOBAL_VALUE: {
83
+ unsigned RematCost = TTI->getGISelRematGlobalCost ();
84
+ unsigned Reg = MI.getOperand (0 ).getReg ();
85
+ unsigned MaxUses = maxUses (RematCost);
86
+ if (MaxUses == UINT_MAX)
87
+ return true ; // Remats are "free" so always localize.
88
+ bool B = isUsesAtMost (Reg, MaxUses);
89
+ return B;
90
+ }
43
91
}
44
92
}
45
93
46
94
void Localizer::getAnalysisUsage (AnalysisUsage &AU) const {
95
+ AU.addRequired <TargetTransformInfoWrapperPass>();
47
96
getSelectionDAGFallbackAnalysisUsage (AU);
48
97
MachineFunctionPass::getAnalysisUsage (AU);
49
98
}
@@ -57,6 +106,106 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
57
106
return InsertMBB == Def.getParent ();
58
107
}
59
108
109
+ bool Localizer::localizeInterBlock (
110
+ MachineFunction &MF, SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
111
+ bool Changed = false ;
112
+ DenseMap<std::pair<MachineBasicBlock *, unsigned >, unsigned > MBBWithLocalDef;
113
+
114
+ // Since the IRTranslator only emits constants into the entry block, and the
115
+ // rest of the GISel pipeline generally emits constants close to their users,
116
+ // we only localize instructions in the entry block here. This might change if
117
+ // we start doing CSE across blocks.
118
+ auto &MBB = MF.front ();
119
+ for (MachineInstr &MI : MBB) {
120
+ if (LocalizedInstrs.count (&MI) || !shouldLocalize (MI))
121
+ continue ;
122
+ LLVM_DEBUG (dbgs () << " Should localize: " << MI);
123
+ assert (MI.getDesc ().getNumDefs () == 1 &&
124
+ " More than one definition not supported yet" );
125
+ unsigned Reg = MI.getOperand (0 ).getReg ();
126
+ // Check if all the users of MI are local.
127
+ // We are going to invalidation the list of use operands, so we
128
+ // can't use range iterator.
129
+ for (auto MOIt = MRI->use_begin (Reg), MOItEnd = MRI->use_end ();
130
+ MOIt != MOItEnd;) {
131
+ MachineOperand &MOUse = *MOIt++;
132
+ // Check if the use is already local.
133
+ MachineBasicBlock *InsertMBB;
134
+ LLVM_DEBUG (MachineInstr &MIUse = *MOUse.getParent ();
135
+ dbgs () << " Checking use: " << MIUse
136
+ << " #Opd: " << MIUse.getOperandNo (&MOUse) << ' \n ' );
137
+ if (isLocalUse (MOUse, MI, InsertMBB))
138
+ continue ;
139
+ LLVM_DEBUG (dbgs () << " Fixing non-local use\n " );
140
+ Changed = true ;
141
+ auto MBBAndReg = std::make_pair (InsertMBB, Reg);
142
+ auto NewVRegIt = MBBWithLocalDef.find (MBBAndReg);
143
+ if (NewVRegIt == MBBWithLocalDef.end ()) {
144
+ // Create the localized instruction.
145
+ MachineInstr *LocalizedMI = MF.CloneMachineInstr (&MI);
146
+ LocalizedInstrs.insert (LocalizedMI);
147
+ MachineInstr &UseMI = *MOUse.getParent ();
148
+ if (MRI->hasOneUse (Reg) && !UseMI.isPHI ())
149
+ InsertMBB->insert (InsertMBB->SkipPHIsAndLabels (UseMI), LocalizedMI);
150
+ else
151
+ InsertMBB->insert (InsertMBB->SkipPHIsAndLabels (InsertMBB->begin ()),
152
+ LocalizedMI);
153
+
154
+ // Set a new register for the definition.
155
+ unsigned NewReg = MRI->createGenericVirtualRegister (MRI->getType (Reg));
156
+ MRI->setRegClassOrRegBank (NewReg, MRI->getRegClassOrRegBank (Reg));
157
+ LocalizedMI->getOperand (0 ).setReg (NewReg);
158
+ NewVRegIt =
159
+ MBBWithLocalDef.insert (std::make_pair (MBBAndReg, NewReg)).first ;
160
+ LLVM_DEBUG (dbgs () << " Inserted: " << *LocalizedMI);
161
+ }
162
+ LLVM_DEBUG (dbgs () << " Update use with: " << printReg (NewVRegIt->second )
163
+ << ' \n ' );
164
+ // Update the user reg.
165
+ MOUse.setReg (NewVRegIt->second );
166
+ }
167
+ }
168
+ return Changed;
169
+ }
170
+
171
+ bool Localizer::localizeIntraBlock (
172
+ SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
173
+ bool Changed = false ;
174
+
175
+ // For each already-localized instruction which has multiple users, then we
176
+ // scan the block top down from the current position until we hit one of them.
177
+
178
+ // FIXME: Consider doing inst duplication if live ranges are very long due to
179
+ // many users, but this case may be better served by regalloc improvements.
180
+
181
+ for (MachineInstr *MI : LocalizedInstrs) {
182
+ unsigned Reg = MI->getOperand (0 ).getReg ();
183
+ MachineBasicBlock &MBB = *MI->getParent ();
184
+ // If the instruction has a single use, we would have already moved it right
185
+ // before its user in localizeInterBlock().
186
+ if (MRI->hasOneUse (Reg))
187
+ continue ;
188
+
189
+ // All of the user MIs of this reg.
190
+ SmallPtrSet<MachineInstr *, 32 > Users;
191
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions (Reg))
192
+ Users.insert (&UseMI);
193
+
194
+ MachineBasicBlock::iterator II (MI);
195
+ ++II;
196
+ while (II != MBB.end () && !Users.count (&*II))
197
+ ++II;
198
+
199
+ LLVM_DEBUG (dbgs () << " Intra-block: moving " << *MI << " before " << *&*II
200
+ << " \n " );
201
+ assert (II != MBB.end () && " Didn't find the user in the MBB" );
202
+ MI->removeFromParent ();
203
+ MBB.insert (II, MI);
204
+ Changed = true ;
205
+ }
206
+ return Changed;
207
+ }
208
+
60
209
bool Localizer::runOnMachineFunction (MachineFunction &MF) {
61
210
// If the ISel pipeline failed, do not bother running that pass.
62
211
if (MF.getProperties ().hasProperty (
@@ -67,62 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
67
216
68
217
init (MF);
69
218
70
- bool Changed = false ;
71
- // Keep track of the instructions we localized.
72
- // We won't need to process them if we see them later in the CFG.
73
- SmallPtrSet<MachineInstr *, 16 > LocalizedInstrs;
74
- DenseMap<std::pair<MachineBasicBlock *, unsigned >, unsigned > MBBWithLocalDef;
75
- // TODO: Do bottom up traversal.
76
- for (MachineBasicBlock &MBB : MF) {
77
- for (MachineInstr &MI : MBB) {
78
- if (LocalizedInstrs.count (&MI) || !shouldLocalize (MI))
79
- continue ;
80
- LLVM_DEBUG (dbgs () << " Should localize: " << MI);
81
- assert (MI.getDesc ().getNumDefs () == 1 &&
82
- " More than one definition not supported yet" );
83
- unsigned Reg = MI.getOperand (0 ).getReg ();
84
- // Check if all the users of MI are local.
85
- // We are going to invalidation the list of use operands, so we
86
- // can't use range iterator.
87
- for (auto MOIt = MRI->use_begin (Reg), MOItEnd = MRI->use_end ();
88
- MOIt != MOItEnd;) {
89
- MachineOperand &MOUse = *MOIt++;
90
- // Check if the use is already local.
91
- MachineBasicBlock *InsertMBB;
92
- LLVM_DEBUG (MachineInstr &MIUse = *MOUse.getParent ();
93
- dbgs () << " Checking use: " << MIUse
94
- << " #Opd: " << MIUse.getOperandNo (&MOUse) << ' \n ' );
95
- if (isLocalUse (MOUse, MI, InsertMBB))
96
- continue ;
97
- LLVM_DEBUG (dbgs () << " Fixing non-local use\n " );
98
- Changed = true ;
99
- auto MBBAndReg = std::make_pair (InsertMBB, Reg);
100
- auto NewVRegIt = MBBWithLocalDef.find (MBBAndReg);
101
- if (NewVRegIt == MBBWithLocalDef.end ()) {
102
- // Create the localized instruction.
103
- MachineInstr *LocalizedMI = MF.CloneMachineInstr (&MI);
104
- LocalizedInstrs.insert (LocalizedMI);
105
- // Don't try to be smart for the insertion point.
106
- // There is no guarantee that the first seen use is the first
107
- // use in the block.
108
- InsertMBB->insert (InsertMBB->SkipPHIsAndLabels (InsertMBB->begin ()),
109
- LocalizedMI);
219
+ // Keep track of the instructions we localized. We'll do a second pass of
220
+ // intra-block localization to further reduce live ranges.
221
+ SmallPtrSet<MachineInstr *, 32 > LocalizedInstrs;
110
222
111
- // Set a new register for the definition.
112
- unsigned NewReg =
113
- MRI->createGenericVirtualRegister (MRI->getType (Reg));
114
- MRI->setRegClassOrRegBank (NewReg, MRI->getRegClassOrRegBank (Reg));
115
- LocalizedMI->getOperand (0 ).setReg (NewReg);
116
- NewVRegIt =
117
- MBBWithLocalDef.insert (std::make_pair (MBBAndReg, NewReg)).first ;
118
- LLVM_DEBUG (dbgs () << " Inserted: " << *LocalizedMI);
119
- }
120
- LLVM_DEBUG (dbgs () << " Update use with: " << printReg (NewVRegIt->second )
121
- << ' \n ' );
122
- // Update the user reg.
123
- MOUse.setReg (NewVRegIt->second );
124
- }
125
- }
126
- }
127
- return Changed;
223
+ bool Changed = localizeInterBlock (MF, LocalizedInstrs);
224
+ return Changed |= localizeIntraBlock (LocalizedInstrs);
128
225
}
0 commit comments