Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a basic alias analysis with redundant-load elim and store-to-load fowarding opts. #4163

Merged
merged 7 commits into from
May 20, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 77 additions & 85 deletions cranelift/codegen/src/alias_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,83 +270,74 @@ impl<'a> AliasAnalysis<'a> {
/// (e.g. in cases of double-indirection with two separate chains
/// of loads).
pub fn compute_and_update_aliases(&mut self) {
let first_block = self
.func
.layout
.blocks()
.next()
.expect("Must have at least one block");
let mut pos = FuncCursor::new(self.func).at_top(first_block);
let mut state = LastStores::default();
let mut last_block = None;
while let Some(inst) = pos.next_inst() {
if let Some(block) = pos.func.layout.inst_block(inst) {
if Some(block) != last_block {
last_block = Some(block);
state = self
.block_input
.get(&block)
.cloned()
.unwrap_or_else(|| LastStores::default());
}
}
log::trace!(
"alias analysis: scanning at inst{} with state {:?} ({:?})",
inst.index(),
state,
pos.func.dfg[inst],
);
let mut pos = FuncCursor::new(self.func);

while let Some(block) = pos.next_block() {
let mut state = self
.block_input
.get(&block)
.cloned()
.unwrap_or_else(|| LastStores::default());

if let Some((address, offset, ty)) = inst_addr_offset_type(pos.func, inst) {
let address = pos.func.dfg.resolve_aliases(address);
let opcode = pos.func.dfg[inst].opcode();
while let Some(inst) = pos.next_inst() {
log::trace!(
"alias analysis: scanning at inst{} with state {:?} ({:?})",
inst.index(),
state,
pos.func.dfg[inst],
);

if opcode.can_store() {
let store_data = inst_store_data(pos.func, inst).unwrap();
let store_data = pos.func.dfg.resolve_aliases(store_data);
let mem_loc = MemoryLoc {
last_store: inst.into(),
address,
offset,
ty,
extending_opcode: get_ext_opcode(opcode),
};
log::trace!(
"alias analysis: at inst{}: store with data v{} at loc {:?}",
inst.index(),
store_data.index(),
mem_loc
);
self.mem_values.insert(mem_loc, (inst, store_data));
} else if opcode.can_load() {
let last_store = state.get_last_store(pos.func, inst);
let load_result = pos.func.dfg.inst_results(inst)[0];
let mem_loc = MemoryLoc {
last_store,
address,
offset,
ty,
extending_opcode: get_ext_opcode(opcode),
};
log::trace!(
"alias analysis: at inst{}: load with last_store inst{} at loc {:?}",
inst.index(),
last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),
mem_loc
);
if let Some((address, offset, ty)) = inst_addr_offset_type(pos.func, inst) {
cfallin marked this conversation as resolved.
Show resolved Hide resolved
let address = pos.func.dfg.resolve_aliases(address);
let opcode = pos.func.dfg[inst].opcode();

// Is there a Value already known to be stored
// at this specific memory location? If so,
// we can alias the load result to this
// already-known Value.
//
// Check if the definition dominates this
// location; it might not, if it comes from a
// load (stores will always dominate though if
// their `last_store` survives through
// meet-points to this use-site).
let aliased =
if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() {
if opcode.can_store() {
let store_data = inst_store_data(pos.func, inst).unwrap();
let store_data = pos.func.dfg.resolve_aliases(store_data);
let mem_loc = MemoryLoc {
last_store: inst.into(),
address,
offset,
ty,
extending_opcode: get_ext_opcode(opcode),
};
log::trace!(
"alias analysis: at inst{}: store with data v{} at loc {:?}",
inst.index(),
store_data.index(),
mem_loc
);
self.mem_values.insert(mem_loc, (inst, store_data));
} else if opcode.can_load() {
let last_store = state.get_last_store(pos.func, inst);
let load_result = pos.func.dfg.inst_results(inst)[0];
let mem_loc = MemoryLoc {
last_store,
address,
offset,
ty,
extending_opcode: get_ext_opcode(opcode),
};
log::trace!(
"alias analysis: at inst{}: load with last_store inst{} at loc {:?}",
inst.index(),
last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),
mem_loc
);

// Is there a Value already known to be stored
// at this specific memory location? If so,
// we can alias the load result to this
// already-known Value.
//
// Check if the definition dominates this
// location; it might not, if it comes from a
// load (stores will always dominate though if
// their `last_store` survives through
// meet-points to this use-site).
let aliased = if let Some((def_inst, value)) =
self.mem_values.get(&mem_loc).cloned()
{
log::trace!(
" -> sees known value v{} from inst{}",
value.index(),
Expand All @@ -370,20 +361,21 @@ impl<'a> AliasAnalysis<'a> {
false
};

// Otherwise, we can keep *this* load around
// as a new equivalent value.
if !aliased {
log::trace!(
" -> inserting load result v{} at loc {:?}",
load_result.index(),
mem_loc
);
self.mem_values.insert(mem_loc, (inst, load_result));
// Otherwise, we can keep *this* load around
// as a new equivalent value.
if !aliased {
log::trace!(
" -> inserting load result v{} at loc {:?}",
load_result.index(),
mem_loc
);
self.mem_values.insert(mem_loc, (inst, load_result));
}
}
}
}

state.update(pos.func, inst);
state.update(pos.func, inst);
}
}
}
}
Expand Down
22 changes: 22 additions & 0 deletions cranelift/filetests/filetests/alias/categories.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
test alias-analysis
set opt_level=speed
target aarch64

;; Check that aliasing properly respects the last store in each
;; "category" separately.

function %f0(i64, i64) -> i32, i32 {

block0(v0: i64, v1: i64):
v2 = iconst.i32 42
v3 = iconst.i32 43
store.i32 heap v2, v0+8
store.i32 table v3, v1+8

v4 = load.i32 heap v0+8
v5 = load.i32 table v1+8
; check: v4 -> v2
; check: v5 -> v3

return v4, v5
}
29 changes: 29 additions & 0 deletions cranelift/filetests/filetests/alias/multiple-blocks.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
test alias-analysis
set opt_level=speed
target aarch64

;; Check RLE across basic blocks.

function %f0(i64 vmctx, i32) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap readonly aligned gv0+8
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32


block0(v0: i64, v1: i32):
v2 = heap_addr.i64 heap0, v1, 0
v3 = load.i32 v2+8
brz v2, block1
jump block2

block1:
v4 = load.i32 v2+8
; check: v4 -> v3
jump block3(v4)

block2:
jump block3(v3)

block3(v5: i32):
return v5
}
35 changes: 35 additions & 0 deletions cranelift/filetests/filetests/alias/partial-redundancy.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
test alias-analysis
set opt_level=speed
target aarch64

;; A test of partial redundancy: we should *not* RLE when an earlier
;; load to the location is only in one predecessor of multiple.

function %f0(i64 vmctx, i32) -> i32, i32 {
gv0 = vmctx
gv1 = load.i64 notrap readonly aligned gv0+8
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
fn0 = %g(i64 vmctx)

block0(v0: i64, v1: i32):
brz v1, block1
jump block2

block1:
v2 = heap_addr.i64 heap0, v1, 0
v3 = load.i32 v2+64
jump block3(v3)

block2:
v4 = heap_addr.i64 heap0, v1, 0
v5 = load.i32 v4+128
jump block3(v5)

block3(v6: i32):
v7 = heap_addr.i64 heap0, v1, 0
v8 = load.i32 v7+64
;; load should survive:
; check: v8 = load.i32 v7+64
return v6, v8

}
80 changes: 21 additions & 59 deletions cranelift/filetests/filetests/alias/simple-alias.clif
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
test compile precise-output
test alias-analysis
set opt_level=speed
target aarch64
cfallin marked this conversation as resolved.
Show resolved Hide resolved

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Redundant-load elimination
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

function %f0(i64 vmctx, i32) -> i32, i32, i32, i32 {
gv0 = vmctx
gv1 = load.i64 notrap readonly aligned gv0+8
Expand All @@ -11,62 +15,27 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i32 {
block0(v0: i64, v1: i32):
v2 = heap_addr.i64 heap0, v1, 0
v3 = load.i32 v2+8
;; This should reuse the load above.
v4 = heap_addr.i64 heap0, v1, 0
v5 = load.i32 v4+8
; check: v5 -> v3

call fn0(v0)

;; The second load is redundant wrt the first, but the call above
;; is a barrier that prevents reusing v3 or v5.
cfallin marked this conversation as resolved.
Show resolved Hide resolved
v6 = load.i32 v4+8
v7 = load.i32 v4+8
; check: v7 -> v6

return v3, v5, v6, v7
}

; stp fp, lr, [sp, #-16]!
; mov fp, sp
; str x25, [sp, #-16]!
; stp x23, x24, [sp, #-16]!
; block0:
; ldr x24, [x0, #8]
; add x14, x24, #8
; ldr w7, [x14, w1, UXTW]
; mov x25, x1
; mov x23, x7
; ldr x1, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
; blr x1
; add x1, x24, #8
; mov x9, x25
; ldr w3, [x1, w9, UXTW]
; mov x0, x23
; mov x1, x23
; mov x2, x3
; ldp x23, x24, [sp], #16
; ldr x25, [sp], #16
; ldp fp, lr, [sp], #16
; ret

function %f1(i64 vmctx, i32) -> i32, i32 {
gv0 = vmctx
gv1 = load.i64 notrap readonly aligned gv0+8
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
fn0 = %g(i64 vmctx)

block0(v0: i64, v1: i32):
v2 = heap_addr.i64 heap0, v1, 0
v3 = load.i32 v2+8
v4 = heap_addr.i64 heap0, v1, 0
v5 = load.i32 v4+8
return v3, v5
}

; stp fp, lr, [sp, #-16]!
; mov fp, sp
; block0:
; ldr x4, [x0, #8]
; add x9, x4, #8
; ldr w1, [x9, w1, UXTW]
; mov x0, x1
; ldp fp, lr, [sp], #16
; ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Store-to-load forwarding
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

function %f2(i64 vmctx, i32) -> i32 {
function %f1(i64 vmctx, i32) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap readonly aligned gv0+8
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
Expand All @@ -75,18 +44,11 @@ function %f2(i64 vmctx, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = heap_addr.i64 heap0, v1, 0
store.i32 v1, v2+8

;; This load should pick up the store above.
v3 = heap_addr.i64 heap0, v1, 0
v4 = load.i32 v3+8
; check: v4 -> v1

return v4
}

; stp fp, lr, [sp, #-16]!
; mov fp, sp
; block0:
; ldr x4, [x0, #8]
; add x7, x4, #8
; str w1, [x7, w1, UXTW]
; mov x0, x1
; ldp fp, lr, [sp], #16
; ret

2 changes: 2 additions & 0 deletions cranelift/filetests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ mod runone;
mod runtest_environment;
mod subtest;

mod test_alias_analysis;
mod test_cat;
mod test_compile;
mod test_dce;
Expand Down Expand Up @@ -111,6 +112,7 @@ pub fn run_passes(
/// a `.clif` test file.
fn new_subtest(parsed: &TestCommand) -> anyhow::Result<Box<dyn subtest::SubTest>> {
match parsed.command {
"alias-analysis" => test_alias_analysis::subtest(parsed),
"cat" => test_cat::subtest(parsed),
"compile" => test_compile::subtest(parsed),
"dce" => test_dce::subtest(parsed),
Expand Down
Loading