-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlift.go
599 lines (546 loc) · 16.3 KB
/
lift.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa2
// This file defines the lifting pass which tries to "lift" Alloc
// cells (new/local variables) into SSA registers, replacing loads
// with the dominating stored value, eliminating loads and stores, and
// inserting φ-nodes as needed.
// Cited papers and resources:
//
// Ron Cytron et al. 1991. Efficiently computing SSA form...
// http://doi.acm.org/10.1145/115372.115320
//
// Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
// Software Practice and Experience 2001, 4:1-10.
// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
//
// Daniel Berlin, llvmdev mailing list, 2012.
// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
// (Be sure to expand the whole thread.)
// TODO(adonovan): opt: there are many optimizations worth evaluating, and
// the conventional wisdom for SSA construction is that a simple
// algorithm well engineered often beats those of better asymptotic
// complexity on all but the most egregious inputs.
//
// Danny Berlin suggests that the Cooper et al. algorithm for
// computing the dominance frontier is superior to Cytron et al.
// Furthermore he recommends that rather than computing the DF for the
// whole function then renaming all alloc cells, it may be cheaper to
// compute the DF for each alloc cell separately and throw it away.
//
// Consider exploiting liveness information to avoid creating dead
// φ-nodes which we then immediately remove.
//
// Integrate lifting with scalar replacement of aggregates (SRA) since
// the two are synergistic.
//
// Also see many other "TODO: opt" suggestions in the code.
import (
"fmt"
"go/token"
"math/big"
"os"
"github.com/rocky/go-types"
)
// If true, perform sanity checking and show diagnostic information at
// each step of lifting. Very verbose.
const debugLifting = false
// domFrontier maps each block to the set of blocks in its dominance
// frontier. The outer slice is conceptually a map keyed by
// Block.Index. The inner slice is conceptually a set, possibly
// containing duplicates.
//
// TODO(adonovan): opt: measure impact of dups; consider a packed bit
// representation, e.g. big.Int, and bitwise parallel operations for
// the union step in the Children loop.
//
// domFrontier's methods mutate the slice's elements but not its
// length, so their receivers needn't be pointers.
//
type domFrontier [][]*BasicBlock
func (df domFrontier) add(u, v *BasicBlock) {
p := &df[u.Index]
*p = append(*p, v)
}
// build builds the dominance frontier df for the dominator (sub)tree
// rooted at u, using the Cytron et al. algorithm.
//
// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
// by pruning the entire IDF computation, rather than merely pruning
// the DF -> IDF step.
func (df domFrontier) build(u *BasicBlock) {
// Encounter each node u in postorder of dom tree.
for _, child := range u.dom.Children {
df.build(child)
}
for _, vb := range u.Succs {
if v := vb.dom; v.idom != u {
df.add(u, vb)
}
}
for _, w := range u.dom.Children {
for _, vb := range df[w.Index] {
// TODO(adonovan): opt: use word-parallel bitwise union.
if v := vb.dom; v.idom != u {
df.add(u, vb)
}
}
}
}
func buildDomFrontier(fn *Function) domFrontier {
df := make(domFrontier, len(fn.Blocks))
df.build(fn.Blocks[0])
if fn.Recover != nil {
df.build(fn.Recover)
}
return df
}
func removeInstr(refs []Instruction, instr Instruction) []Instruction {
i := 0
for _, ref := range refs {
if ref == instr {
continue
}
refs[i] = ref
i++
}
for j := i; j != len(refs); j++ {
refs[j] = nil // aid GC
}
return refs[:i]
}
// lift attempts to replace local and new Allocs accessed only with
// load/store by SSA registers, inserting φ-nodes where necessary.
// The result is a program in classical pruned SSA form.
//
// Preconditions:
// - fn has no dead blocks (blockopt has run).
// - Def/use info (Operands and Referrers) is up-to-date.
// - The dominator tree is up-to-date.
//
func lift(fn *Function) {
// TODO(adonovan): opt: lots of little optimizations may be
// worthwhile here, especially if they cause us to avoid
// buildDomFrontier. For example:
//
// - Alloc never loaded? Eliminate.
// - Alloc never stored? Replace all loads with a zero constant.
// - Alloc stored once? Replace loads with dominating store;
// don't forget that an Alloc is itself an effective store
// of zero.
// - Alloc used only within a single block?
// Use degenerate algorithm avoiding φ-nodes.
// - Consider synergy with scalar replacement of aggregates (SRA).
// e.g. *(&x.f) where x is an Alloc.
// Perhaps we'd get better results if we generated this as x.f
// i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
// Unclear.
//
// But we will start with the simplest correct code.
df := buildDomFrontier(fn)
if debugLifting {
title := false
for i, blocks := range df {
if blocks != nil {
if !title {
fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
title = true
}
fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
}
}
}
newPhis := make(newPhiMap)
// During this pass we will replace some BasicBlock.Instrs
// (allocs, loads and stores) with nil, keeping a count in
// BasicBlock.gaps. At the end we will reset Instrs to the
// concatenation of all non-dead newPhis and non-nil Instrs
// for the block, reusing the original array if space permits.
// While we're here, we also eliminate 'rundefers'
// instructions in functions that contain no 'defer'
// instructions.
usesDefer := false
// Determine which allocs we can lift and number them densely.
// The renaming phase uses this numbering for compact maps.
numAllocs := 0
for _, b := range fn.Blocks {
b.gaps = 0
b.rundefers = 0
for _, instr := range b.Instrs {
switch instr := instr.(type) {
case *Alloc:
index := -1
if liftAlloc(df, instr, newPhis) {
index = numAllocs
numAllocs++
}
instr.index = index
case *Defer:
usesDefer = true
case *RunDefers:
b.rundefers++
}
}
}
// renaming maps an alloc (keyed by index) to its replacement
// value. Initially the renaming contains nil, signifying the
// zero constant of the appropriate type; we construct the
// Const lazily at most once on each path through the domtree.
// TODO(adonovan): opt: cache per-function not per subtree.
renaming := make([]Value, numAllocs)
// Renaming.
rename(fn.Blocks[0], renaming, newPhis)
// Eliminate dead new phis, then prepend the live ones to each block.
for _, b := range fn.Blocks {
// Compress the newPhis slice to eliminate unused phis.
// TODO(adonovan): opt: compute liveness to avoid
// placing phis in blocks for which the alloc cell is
// not live.
nps := newPhis[b]
j := 0
for _, np := range nps {
if !phiIsLive(np.phi) {
// discard it, first removing it from referrers
for _, newval := range np.phi.Edges {
if refs := newval.Referrers(); refs != nil {
*refs = removeInstr(*refs, np.phi)
}
}
continue
}
nps[j] = np
j++
}
nps = nps[:j]
rundefersToKill := b.rundefers
if usesDefer {
rundefersToKill = 0
}
if j+b.gaps+rundefersToKill == 0 {
continue // fast path: no new phis or gaps
}
// Compact nps + non-nil Instrs into a new slice.
// TODO(adonovan): opt: compact in situ if there is
// sufficient space or slack in the slice.
dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
for i, np := range nps {
dst[i] = np.phi
}
for _, instr := range b.Instrs {
if instr == nil {
continue
}
if !usesDefer {
if _, ok := instr.(*RunDefers); ok {
continue
}
}
dst[j] = instr
j++
}
for i, np := range nps {
dst[i] = np.phi
}
b.Instrs = dst
}
// Remove any fn.Locals that were lifted.
j := 0
for _, l := range fn.Locals {
if l.index < 0 {
fn.Locals[j] = l
j++
}
}
// Nil out fn.Locals[j:] to aid GC.
for i := j; i < len(fn.Locals); i++ {
fn.Locals[i] = nil
}
fn.Locals = fn.Locals[:j]
}
func phiIsLive(phi *Phi) bool {
for _, instr := range *phi.Referrers() {
if instr == phi {
continue // self-refs don't count
}
if _, ok := instr.(*DebugRef); ok {
continue // debug refs don't count
}
return true
}
return false
}
type blockSet struct{ big.Int } // (inherit methods from Int)
// add adds b to the set and returns true if the set changed.
func (s *blockSet) add(b *BasicBlock) bool {
i := b.Index
if s.Bit(i) != 0 {
return false
}
s.SetBit(&s.Int, i, 1)
return true
}
// take removes an arbitrary element from a set s and
// returns its index, or returns -1 if empty.
func (s *blockSet) take() int {
l := s.BitLen()
for i := 0; i < l; i++ {
if s.Bit(i) == 1 {
s.SetBit(&s.Int, i, 0)
return i
}
}
return -1
}
// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
// it replaces.
type newPhi struct {
phi *Phi
alloc *Alloc
}
// newPhiMap records for each basic block, the set of newPhis that
// must be prepended to the block.
type newPhiMap map[*BasicBlock][]newPhi
// liftAlloc determines whether alloc can be lifted into registers,
// and if so, it populates newPhis with all the φ-nodes it may require
// and returns true.
//
func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap) bool {
// Don't lift aggregates into registers, because we don't have
// a way to express their zero-constants.
switch deref(alloc.Type()).Underlying().(type) {
case *types.Array, *types.Struct:
return false
}
// Don't lift named return values in functions that defer
// calls that may recover from panic.
if fn := alloc.Parent(); fn.Recover != nil {
for _, nr := range fn.namedResults {
if nr == alloc {
return false
}
}
}
// Compute defblocks, the set of blocks containing a
// definition of the alloc cell.
var defblocks blockSet
for _, instr := range *alloc.Referrers() {
// Bail out if we discover the alloc is not liftable;
// the only operations permitted to use the alloc are
// loads/stores into the cell, and DebugRef.
switch instr := instr.(type) {
case *Store:
if instr.Val == alloc {
return false // address used as value
}
if instr.Addr != alloc {
panic("Alloc.Referrers is inconsistent")
}
defblocks.add(instr.Block())
case *UnOp:
if instr.Op != token.MUL {
return false // not a load
}
if instr.X != alloc {
panic("Alloc.Referrers is inconsistent")
}
case *DebugRef:
// ok
default:
return false // some other instruction
}
}
// The Alloc itself counts as a (zero) definition of the cell.
defblocks.add(alloc.Block())
if debugLifting {
fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
}
fn := alloc.Parent()
// Φ-insertion.
//
// What follows is the body of the main loop of the insert-φ
// function described by Cytron et al, but instead of using
// counter tricks, we just reset the 'hasAlready' and 'work'
// sets each iteration. These are bitmaps so it's pretty cheap.
//
// TODO(adonovan): opt: recycle slice storage for W,
// hasAlready, defBlocks across liftAlloc calls.
var hasAlready blockSet
// Initialize W and work to defblocks.
var work blockSet = defblocks // blocks seen
var W blockSet // blocks to do
W.Set(&defblocks.Int)
// Traverse iterated dominance frontier, inserting φ-nodes.
for i := W.take(); i != -1; i = W.take() {
u := fn.Blocks[i]
for _, v := range df[u.Index] {
if hasAlready.add(v) {
// Create φ-node.
// It will be prepended to v.Instrs later, if needed.
phi := &Phi{
Edges: make([]Value, len(v.Preds)),
Comment: alloc.Comment,
}
phi.pos = alloc.Pos()
phi.setType(deref(alloc.Type()))
phi.block = v
if debugLifting {
fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
}
newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
if work.add(v) {
W.add(v)
}
}
}
}
return true
}
// replaceAll replaces all intraprocedural uses of x with y,
// updating x.Referrers and y.Referrers.
// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
//
func replaceAll(x, y Value) {
var rands []*Value
pxrefs := x.Referrers()
pyrefs := y.Referrers()
for _, instr := range *pxrefs {
rands = instr.Operands(rands[:0]) // recycle storage
for _, rand := range rands {
if *rand != nil {
if *rand == x {
*rand = y
}
}
}
if pyrefs != nil {
*pyrefs = append(*pyrefs, instr) // dups ok
}
}
*pxrefs = nil // x is now unreferenced
}
// renamed returns the value to which alloc is being renamed,
// constructing it lazily if it's the implicit zero initialization.
//
func renamed(renaming []Value, alloc *Alloc) Value {
v := renaming[alloc.index]
if v == nil {
v = zeroConst(deref(alloc.Type()))
renaming[alloc.index] = v
}
return v
}
// rename implements the (Cytron et al) SSA renaming algorithm, a
// preorder traversal of the dominator tree replacing all loads of
// Alloc cells with the value stored to that cell by the dominating
// store instruction. For lifting, we need only consider loads,
// stores and φ-nodes.
//
// renaming is a map from *Alloc (keyed by index number) to its
// dominating stored value; newPhis[x] is the set of new φ-nodes to be
// prepended to block x.
//
func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
// Each φ-node becomes the new name for its associated Alloc.
for _, np := range newPhis[u] {
phi := np.phi
alloc := np.alloc
renaming[alloc.index] = phi
}
// Rename loads and stores of allocs.
for i, instr := range u.Instrs {
switch instr := instr.(type) {
case *Alloc:
if instr.index >= 0 { // store of zero to Alloc cell
// Replace dominated loads by the zero value.
renaming[instr.index] = nil
if debugLifting {
fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
}
// Delete the Alloc.
u.Instrs[i] = nil
u.gaps++
}
case *Store:
if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
// Replace dominated loads by the stored value.
renaming[alloc.index] = instr.Val
if debugLifting {
fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
instr, instr.Val.Name())
}
// Remove the store from the referrer list of the stored value.
if refs := instr.Val.Referrers(); refs != nil {
*refs = removeInstr(*refs, instr)
}
// Delete the Store.
u.Instrs[i] = nil
u.gaps++
}
case *UnOp:
if instr.Op == token.MUL {
if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
newval := renamed(renaming, alloc)
if debugLifting {
fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
instr.Name(), instr, newval.Name())
}
// Replace all references to
// the loaded value by the
// dominating stored value.
replaceAll(instr, newval)
// Delete the Load.
u.Instrs[i] = nil
u.gaps++
}
}
case *DebugRef:
if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
if instr.IsAddr {
instr.X = renamed(renaming, alloc)
instr.IsAddr = false
// Add DebugRef to instr.X's referrers.
if refs := instr.X.Referrers(); refs != nil {
*refs = append(*refs, instr)
}
} else {
// A source expression denotes the address
// of an Alloc that was optimized away.
instr.X = nil
// Delete the DebugRef.
u.Instrs[i] = nil
u.gaps++
}
}
}
}
// For each φ-node in a CFG successor, rename the edge.
for _, v := range u.Succs {
phis := newPhis[v]
if len(phis) == 0 {
continue
}
i := v.predIndex(u)
for _, np := range phis {
phi := np.phi
alloc := np.alloc
newval := renamed(renaming, alloc)
if debugLifting {
fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
phi.Name(), u, v, i, alloc.Name(), newval.Name())
}
phi.Edges[i] = newval
if prefs := newval.Referrers(); prefs != nil {
*prefs = append(*prefs, phi)
}
}
}
// Continue depth-first recursion over domtree, pushing a
// fresh copy of the renaming map for each subtree.
for _, v := range u.dom.Children {
// TODO(adonovan): opt: avoid copy on final iteration; use destructive update.
r := make([]Value, len(renaming))
copy(r, renaming)
rename(v, r, newPhis)
}
}