Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a basic alias analysis with redundant-load elim and store-to-load fowarding opts. #4163

Merged
merged 7 commits into from
May 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
388 changes: 388 additions & 0 deletions cranelift/codegen/src/alias_analysis.rs

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions cranelift/codegen/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//! contexts concurrently. Typically, you would have one context per compilation thread and only a
//! single ISA instance.

use crate::alias_analysis::AliasAnalysis;
use crate::binemit::CodeInfo;
use crate::dce::do_dce;
use crate::dominator_tree::DominatorTree;
Expand Down Expand Up @@ -162,6 +163,11 @@ impl Context {

self.remove_constant_phis(isa)?;

if opt_level != OptLevel::None {
self.replace_redundant_loads()?;
self.simple_gvn(isa)?;
}

let result = isa.compile_function(&self.func, self.want_disasm)?;
let info = result.code_info();
self.mach_compile_result = Some(result);
Expand Down Expand Up @@ -341,6 +347,17 @@ impl Context {
self.verify_if(fisa)
}

/// Replace all redundant loads with the known values in
/// memory. These are loads whose values were already loaded by
/// other loads earlier, as well as loads whose values were stored
/// by a store instruction to the same instruction (so-called
/// "store-to-load forwarding").
pub fn replace_redundant_loads(&mut self) -> CodegenResult<()> {
let mut analysis = AliasAnalysis::new(&mut self.func, &self.domtree);
analysis.compute_and_update_aliases();
Ok(())
}

/// Harvest candidate left-hand sides for superoptimization with Souper.
#[cfg(feature = "souper-harvest")]
pub fn souper_harvest(
Expand Down
80 changes: 78 additions & 2 deletions cranelift/codegen/src/inst_predicates.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Instruction predicates/properties, shared by various analyses.

use crate::ir::{DataFlowGraph, Function, Inst, InstructionData, Opcode};
use crate::ir::immediates::Offset32;
use crate::ir::instructions::BranchInfo;
use crate::ir::{Block, DataFlowGraph, Function, Inst, InstructionData, Opcode, Type, Value};
use crate::machinst::ty_bits;
use cranelift_entity::EntityRef;

Expand Down Expand Up @@ -78,3 +79,78 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
_ => None,
}
}

/// Get the address, offset, and access type from the given instruction, if any.
pub fn inst_addr_offset_type(func: &Function, inst: Inst) -> Option<(Value, Offset32, Type)> {
let data = &func.dfg[inst];
match data {
InstructionData::Load { arg, offset, .. } => {
let ty = func.dfg.value_type(func.dfg.inst_results(inst)[0]);
Some((*arg, *offset, ty))
}
InstructionData::LoadNoOffset { arg, .. } => {
let ty = func.dfg.value_type(func.dfg.inst_results(inst)[0]);
Some((*arg, 0.into(), ty))
}
InstructionData::Store { args, offset, .. } => {
let ty = func.dfg.value_type(args[0]);
Some((args[1], *offset, ty))
}
InstructionData::StoreNoOffset { args, .. } => {
let ty = func.dfg.value_type(args[0]);
Some((args[1], 0.into(), ty))
}
_ => None,
}
}

/// Get the store data, if any, from an instruction.
pub fn inst_store_data(func: &Function, inst: Inst) -> Option<Value> {
let data = &func.dfg[inst];
match data {
InstructionData::Store { args, .. } | InstructionData::StoreNoOffset { args, .. } => {
Some(args[0])
}
_ => None,
}
}

/// Determine whether this opcode behaves as a memory fence, i.e.,
/// prohibits any moving of memory accesses across it.
pub fn has_memory_fence_semantics(op: Opcode) -> bool {
match op {
Opcode::AtomicRmw
| Opcode::AtomicCas
| Opcode::AtomicLoad
| Opcode::AtomicStore
| Opcode::Fence => true,
Opcode::Call | Opcode::CallIndirect => true,
_ => false,
}
}

/// Visit all successors of a block with a given visitor closure.
pub(crate) fn visit_block_succs<F: FnMut(Inst, Block)>(f: &Function, block: Block, mut visit: F) {
for inst in f.layout.block_likely_branches(block) {
if f.dfg[inst].opcode().is_branch() {
visit_branch_targets(f, inst, &mut visit);
}
}
}

fn visit_branch_targets<F: FnMut(Inst, Block)>(f: &Function, inst: Inst, visit: &mut F) {
match f.dfg[inst].analyze_branch(&f.dfg.value_lists) {
BranchInfo::NotABranch => {}
BranchInfo::SingleDest(dest, _) => {
visit(inst, dest);
}
BranchInfo::Table(table, maybe_dest) => {
if let Some(dest) = maybe_dest {
visit(inst, dest);
}
for &dest in f.jump_tables[table].as_slice() {
visit(inst, dest);
}
}
}
}
112 changes: 111 additions & 1 deletion cranelift/codegen/src/ir/memflags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,20 @@ enum FlagBit {
Readonly,
LittleEndian,
BigEndian,
/// Accesses only the "heap" part of abstract state. Used for
/// alias analysis. Mutually exclusive with "table" and "vmctx".
Heap,
/// Accesses only the "table" part of abstract state. Used for
/// alias analysis. Mutually exclusive with "heap" and "vmctx".
Table,
/// Accesses only the "vmctx" part of abstract state. Used for
/// alias analysis. Mutually exclusive with "heap" and "table".
Vmctx,
}

const NAMES: [&str; 5] = ["notrap", "aligned", "readonly", "little", "big"];
const NAMES: [&str; 8] = [
"notrap", "aligned", "readonly", "little", "big", "heap", "table", "vmctx",
];

/// Endianness of a memory access.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
Expand Down Expand Up @@ -110,6 +121,12 @@ impl MemFlags {
assert!(!(self.read(FlagBit::LittleEndian) && self.read(FlagBit::BigEndian)));
}

/// Set endianness of the memory access, returning new flags.
pub fn with_endianness(mut self, endianness: Endianness) -> Self {
self.set_endianness(endianness);
self
}

/// Test if the `notrap` flag is set.
///
/// Normally, trapping is part of the semantics of a load/store operation. If the platform
Expand All @@ -128,6 +145,12 @@ impl MemFlags {
self.set(FlagBit::Notrap)
}

/// Set the `notrap` flag, returning new flags.
pub fn with_notrap(mut self) -> Self {
self.set_notrap();
self
}

/// Test if the `aligned` flag is set.
///
/// By default, Cranelift memory instructions work with any unaligned effective address. If the
Expand All @@ -142,6 +165,12 @@ impl MemFlags {
self.set(FlagBit::Aligned)
}

/// Set the `aligned` flag, returning new flags.
pub fn with_aligned(mut self) -> Self {
self.set_aligned();
self
}

/// Test if the `readonly` flag is set.
///
/// Loads with this flag have no memory dependencies.
Expand All @@ -155,6 +184,87 @@ impl MemFlags {
pub fn set_readonly(&mut self) {
self.set(FlagBit::Readonly)
}

/// Set the `readonly` flag, returning new flags.
pub fn with_readonly(mut self) -> Self {
self.set_readonly();
self
}

/// Test if the `heap` bit is set.
///
/// Loads and stores with this flag accesses the "heap" part of
/// abstract state. This is disjoint from the "table", "vmctx",
/// and "other" parts of abstract state. In concrete terms, this
/// means that behavior is undefined if the same memory is also
/// accessed by another load/store with one of the other
/// alias-analysis bits (`table`, `vmctx`) set, or `heap` not set.
pub fn heap(self) -> bool {
self.read(FlagBit::Heap)
}

/// Set the `heap` bit. See the notes about mutual exclusion with
/// other bits in `heap()`.
pub fn set_heap(&mut self) {
assert!(!self.table() && !self.vmctx());
self.set(FlagBit::Heap);
}

/// Set the `heap` bit, returning new flags.
pub fn with_heap(mut self) -> Self {
self.set_heap();
self
}

/// Test if the `table` bit is set.
///
/// Loads and stores with this flag accesses the "table" part of
/// abstract state. This is disjoint from the "heap", "vmctx",
/// and "other" parts of abstract state. In concrete terms, this
/// means that behavior is undefined if the same memory is also
/// accessed by another load/store with one of the other
/// alias-analysis bits (`heap`, `vmctx`) set, or `table` not set.
pub fn table(self) -> bool {
self.read(FlagBit::Table)
}

/// Set the `table` bit. See the notes about mutual exclusion with
/// other bits in `table()`.
pub fn set_table(&mut self) {
assert!(!self.heap() && !self.vmctx());
self.set(FlagBit::Table);
}

/// Set the `table` bit, returning new flags.
pub fn with_table(mut self) -> Self {
self.set_table();
self
}

/// Test if the `vmctx` bit is set.
///
/// Loads and stores with this flag accesses the "vmctx" part of
/// abstract state. This is disjoint from the "heap", "table",
/// and "other" parts of abstract state. In concrete terms, this
/// means that behavior is undefined if the same memory is also
/// accessed by another load/store with one of the other
/// alias-analysis bits (`heap`, `table`) set, or `vmctx` not set.
pub fn vmctx(self) -> bool {
self.read(FlagBit::Vmctx)
}

/// Set the `vmctx` bit. See the notes about mutual exclusion with
/// other bits in `vmctx()`.
pub fn set_vmctx(&mut self) {
assert!(!self.heap() && !self.table());
self.set(FlagBit::Vmctx);
}

/// Set the `vmctx` bit, returning new flags.
pub fn with_vmctx(mut self) -> Self {
self.set_vmctx();
self
}
}

impl fmt::Display for MemFlags {
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ pub use crate::entity::packed_option;
pub use crate::machinst::buffer::{MachCallSite, MachReloc, MachSrcLoc, MachStackMap, MachTrap};
pub use crate::machinst::TextSectionBuilder;

mod alias_analysis;
mod bitset;
mod constant_hash;
mod context;
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/machinst/blockorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@

use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet};
use crate::inst_predicates::visit_block_succs;
use crate::ir::{Block, Function, Inst, Opcode};
use crate::machinst::lower::visit_block_succs;
use crate::machinst::*;

use smallvec::SmallVec;
Expand Down
27 changes: 0 additions & 27 deletions cranelift/codegen/src/machinst/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use crate::data_value::DataValue;
use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet};
use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
use crate::ir::instructions::BranchInfo;
use crate::ir::{
types::{FFLAGS, IFLAGS},
ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, ExternalName, Function,
Expand Down Expand Up @@ -1491,29 +1490,3 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
self.vcode.set_vreg_alias(from, to);
}
}

/// Visit all successors of a block with a given visitor closure.
pub(crate) fn visit_block_succs<F: FnMut(Inst, Block)>(f: &Function, block: Block, mut visit: F) {
for inst in f.layout.block_likely_branches(block) {
if f.dfg[inst].opcode().is_branch() {
visit_branch_targets(f, inst, &mut visit);
}
}
}

fn visit_branch_targets<F: FnMut(Inst, Block)>(f: &Function, inst: Inst, visit: &mut F) {
match f.dfg[inst].analyze_branch(&f.dfg.value_lists) {
BranchInfo::NotABranch => {}
BranchInfo::SingleDest(dest, _) => {
visit(inst, dest);
}
BranchInfo::Table(table, maybe_dest) => {
if let Some(dest) = maybe_dest {
visit(inst, dest);
}
for &dest in f.jump_tables[table].as_slice() {
visit(inst, dest);
}
}
}
}
22 changes: 22 additions & 0 deletions cranelift/filetests/filetests/alias/categories.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
test alias-analysis
set opt_level=speed
target aarch64

;; Check that aliasing properly respects the last store in each
;; "category" separately.

function %f0(i64, i64) -> i32, i32 {

block0(v0: i64, v1: i64):
v2 = iconst.i32 42
v3 = iconst.i32 43
store.i32 heap v2, v0+8
store.i32 table v3, v1+8

v4 = load.i32 heap v0+8
v5 = load.i32 table v1+8
; check: v4 -> v2
; check: v5 -> v3

return v4, v5
}
44 changes: 44 additions & 0 deletions cranelift/filetests/filetests/alias/extends.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
test alias-analysis
set opt_level=speed
target aarch64

;; Test that extension modes are properly accounted for when deciding
;; whether loads alias.

function %f0(i64 vmctx, i32) -> i32, i32, i32, i64, i64, i64 {
gv0 = vmctx
gv1 = load.i64 notrap readonly aligned gv0+8
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32

block0(v0: i64, v1: i32):
v2 = heap_addr.i64 heap0, v1, 0

;; Initial load. This will not be reused by anything below, even
;; though it does access the same address.
v3 = load.i32 v2+8

;; These loads must remain (must not be removed as redundant).
v4 = uload8.i32 v2+8
; check: v4 = uload8.i32 v2+8
v5 = sload8.i32 v2+8
; check: v5 = sload8.i32 v2+8
v6 = load.i64 v2+8
; check: v6 = load.i64 v2+8

;; 8-bit store only partially overwrites the address.
istore8 v6, v2+8

;; This must not pick up the store data.
v7 = load.i64 v2+8
; check: v7 = load.i64 v2+8

;; Another store, this one non-truncating but actually using an
;; `i8` value.
v8 = iconst.i8 123
store.i8 v8, v2+8

v9 = load.i64 v2+8
; check: v9 = load.i64 v2+8

return v3, v4, v5, v6, v7, v9
}
Loading