Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cranelift: Enable "chaos mode" in egraph pass #7968

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions cranelift/codegen/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ impl Context {

self.verify_if(isa)?;

self.optimize(isa)?;
self.optimize(isa, ctrl_plane)?;

isa.compile_function(&self.func, &self.domtree, self.want_disasm, ctrl_plane)
}
Expand All @@ -151,7 +151,11 @@ impl Context {
/// allocation.
///
/// Public only for testing purposes.
pub fn optimize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
pub fn optimize(
&mut self,
isa: &dyn TargetIsa,
ctrl_plane: &mut ControlPlane,
) -> CodegenResult<()> {
log::debug!(
"Number of CLIF instructions to optimize: {}",
self.func.dfg.num_insts()
Expand Down Expand Up @@ -185,7 +189,7 @@ impl Context {
self.remove_constant_phis(isa)?;

if opt_level != OptLevel::None {
self.egraph_pass(isa)?;
self.egraph_pass(isa, ctrl_plane)?;
}

Ok(())
Expand Down Expand Up @@ -347,7 +351,11 @@ impl Context {
}

/// Run optimizations via the egraph infrastructure.
pub fn egraph_pass<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()>
pub fn egraph_pass<'a, FOI>(
&mut self,
fisa: FOI,
ctrl_plane: &mut ControlPlane,
) -> CodegenResult<()>
where
FOI: Into<FlagsOrIsa<'a>>,
{
Expand All @@ -364,6 +372,7 @@ impl Context {
&self.domtree,
&self.loop_analysis,
&mut alias_analysis,
ctrl_plane,
);
pass.run();
log::debug!("egraph stats: {:?}", pass.stats);
Expand Down
74 changes: 36 additions & 38 deletions cranelift/codegen/src/egraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use crate::opts::IsleContext;
use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap};
use crate::trace;
use crate::unionfind::UnionFind;
use cranelift_control::ControlPlane;
use cranelift_entity::packed_option::ReservedValue;
use cranelift_entity::SecondaryMap;
use smallvec::SmallVec;
Expand Down Expand Up @@ -53,6 +54,9 @@ pub struct EgraphPass<'a> {
/// Loop analysis results, used for built-in LICM during
/// elaboration.
loop_analysis: &'a LoopAnalysis,
/// Chaos-mode control-plane so we can test that we still get
/// correct results when our heuristics make bad decisions.
ctrl_plane: &'a mut ControlPlane,
/// Which canonical Values do we want to rematerialize in each
/// block where they're used?
///
Expand Down Expand Up @@ -84,6 +88,7 @@ where
pub(crate) stats: &'opt mut Stats,
pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,
pub(crate) alias_analysis_state: &'opt mut LastStores,
ctrl_plane: &'opt mut ControlPlane,
// Held locally during optimization of one node (recursively):
pub(crate) rewrite_depth: usize,
pub(crate) subsume_values: FxHashSet<Value>,
Expand Down Expand Up @@ -217,7 +222,6 @@ where
let orig_value = self.func.dfg.first_result(inst);

let mut optimized_values = std::mem::take(&mut self.optimized_values);
let mut isle_ctx = IsleContext { ctx: self };

// Limit rewrite depth. When we apply optimization rules, they
// may create new nodes (values) and those are, recursively,
Expand All @@ -229,30 +233,31 @@ where
// infinite or problematic recursion, we bound the rewrite
// depth to a small constant here.
const REWRITE_LIMIT: usize = 5;
if isle_ctx.ctx.rewrite_depth > REWRITE_LIMIT {
isle_ctx.ctx.stats.rewrite_depth_limit += 1;
if self.rewrite_depth > REWRITE_LIMIT {
self.stats.rewrite_depth_limit += 1;
return orig_value;
}
isle_ctx.ctx.rewrite_depth += 1;
trace!(
"Incrementing rewrite depth; now {}",
isle_ctx.ctx.rewrite_depth
);
self.rewrite_depth += 1;
trace!("Incrementing rewrite depth; now {}", self.rewrite_depth);

// Invoke the ISLE toplevel constructor, getting all new
// values produced as equivalents to this value.
trace!("Calling into ISLE with original value {}", orig_value);
isle_ctx.ctx.stats.rewrite_rule_invoked += 1;
self.stats.rewrite_rule_invoked += 1;
debug_assert!(optimized_values.is_empty());
crate::opts::generated_code::constructor_simplify(
&mut isle_ctx,
&mut IsleContext { ctx: self },
orig_value,
&mut optimized_values,
);
trace!(
" -> returned from ISLE, generated {} optimized values",
optimized_values.len()
);

// It's not supposed to matter what order `simplify` returns values in.
self.ctrl_plane.shuffle(&mut optimized_values);

if optimized_values.len() > MATCHES_LIMIT {
trace!("Reached maximum matches limit; too many optimized values, ignoring rest.");
optimized_values.truncate(MATCHES_LIMIT);
Expand All @@ -272,45 +277,30 @@ where
trace!(" -> same as orig value; skipping");
continue;
}
if isle_ctx.ctx.subsume_values.contains(&optimized_value) {
if self.subsume_values.contains(&optimized_value) {
// Merge in the unionfind so canonicalization
// still works, but take *only* the subsuming
// value, and break now.
isle_ctx.ctx.eclasses.union(optimized_value, union_value);
isle_ctx
.ctx
.func
.dfg
.merge_facts(optimized_value, union_value);
self.eclasses.union(optimized_value, union_value);
self.func.dfg.merge_facts(optimized_value, union_value);
union_value = optimized_value;
break;
}

let old_union_value = union_value;
union_value = isle_ctx
.ctx
.func
.dfg
.union(old_union_value, optimized_value);
isle_ctx.ctx.stats.union += 1;
union_value = self.func.dfg.union(old_union_value, optimized_value);
self.stats.union += 1;
trace!(" -> union: now {}", union_value);
isle_ctx.ctx.eclasses.add(union_value);
isle_ctx
.ctx
.eclasses
.union(old_union_value, optimized_value);
isle_ctx
.ctx
.func
.dfg
.merge_facts(old_union_value, optimized_value);
isle_ctx.ctx.eclasses.union(old_union_value, union_value);
self.eclasses.add(union_value);
self.eclasses.union(old_union_value, optimized_value);
self.func.dfg.merge_facts(old_union_value, optimized_value);
self.eclasses.union(old_union_value, union_value);
}

isle_ctx.ctx.rewrite_depth -= 1;
self.rewrite_depth -= 1;

debug_assert!(isle_ctx.ctx.optimized_values.is_empty());
isle_ctx.ctx.optimized_values = optimized_values;
debug_assert!(self.optimized_values.is_empty());
self.optimized_values = optimized_values;

union_value
}
Expand Down Expand Up @@ -400,6 +390,7 @@ impl<'a> EgraphPass<'a> {
raw_domtree: &'a DominatorTree,
loop_analysis: &'a LoopAnalysis,
alias_analysis: &'a mut AliasAnalysis<'a>,
ctrl_plane: &'a mut ControlPlane,
) -> Self {
let num_values = func.dfg.num_values();
let mut domtree = DominatorTreePreorder::new();
Expand All @@ -409,6 +400,7 @@ impl<'a> EgraphPass<'a> {
domtree,
loop_analysis,
alias_analysis,
ctrl_plane,
stats: Stats::default(),
eclasses: UnionFind::with_capacity(num_values),
remat_values: FxHashSet::default(),
Expand Down Expand Up @@ -505,7 +497,11 @@ impl<'a> EgraphPass<'a> {
// We popped this block; push children
// immediately, then process this block.
block_stack.push(StackEntry::Pop);
block_stack.extend(self.domtree.children(block).map(StackEntry::Visit));
block_stack.extend(
self.ctrl_plane
.shuffled(self.domtree.children(block))
.map(StackEntry::Visit),
);
effectful_gvn_map.increment_depth();

trace!("Processing block {}", block);
Expand Down Expand Up @@ -557,6 +553,7 @@ impl<'a> EgraphPass<'a> {
stats: &mut self.stats,
alias_analysis: self.alias_analysis,
alias_analysis_state: &mut alias_analysis_state,
ctrl_plane: self.ctrl_plane,
optimized_values: Default::default(),
};

Expand Down Expand Up @@ -614,6 +611,7 @@ impl<'a> EgraphPass<'a> {
self.loop_analysis,
&mut self.remat_values,
&mut self.stats,
self.ctrl_plane,
);
elaborator.elaborate();

Expand Down
37 changes: 34 additions & 3 deletions cranelift/codegen/src/egraph/elaborate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::loop_analysis::{Loop, LoopAnalysis};
use crate::scoped_hash_map::ScopedHashMap;
use crate::trace;
use alloc::vec::Vec;
use cranelift_control::ControlPlane;
use cranelift_entity::{packed_option::ReservedValue, SecondaryMap};
use smallvec::{smallvec, SmallVec};

Expand Down Expand Up @@ -64,6 +65,9 @@ pub(crate) struct Elaborator<'a> {
/// Stats for various events during egraph processing, to help
/// with optimization of this infrastructure.
stats: &'a mut Stats,
/// Chaos-mode control-plane so we can test that we still get
/// correct results when our heuristics make bad decisions.
ctrl_plane: &'a mut ControlPlane,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -139,6 +143,7 @@ impl<'a> Elaborator<'a> {
loop_analysis: &'a LoopAnalysis,
remat_values: &'a FxHashSet<Value>,
stats: &'a mut Stats,
ctrl_plane: &'a mut ControlPlane,
) -> Self {
let num_values = func.dfg.num_values();
let mut value_to_best_value =
Expand All @@ -158,6 +163,7 @@ impl<'a> Elaborator<'a> {
block_stack: vec![],
remat_copies: FxHashMap::default(),
stats,
ctrl_plane,
}
}

Expand Down Expand Up @@ -215,6 +221,14 @@ impl<'a> Elaborator<'a> {
fn compute_best_values(&mut self) {
let best = &mut self.value_to_best_value;

// We can't make random decisions inside the fixpoint loop below because
// that could cause values to change on every iteration of the loop,
// which would make the loop never terminate. So in chaos testing
// mode we need a form of making suboptimal decisions that is fully
// deterministic. We choose to simply make the worst decision we know
// how to do instead of the best.
let use_worst = self.ctrl_plane.get_decision();

// Do a fixpoint loop to compute the best value for each eclass.
//
// The maximum number of iterations is the length of the longest chain
Expand All @@ -227,7 +241,14 @@ impl<'a> Elaborator<'a> {
// which we don't really care about, or (b) the CLIF producer did
// something weird, in which case it is their responsibility to stop
// doing that.
trace!("Entering fixpoint loop to compute the best values for each eclass");
trace!(
"Entering fixpoint loop to compute the {} values for each eclass",
if use_worst {
"worst (chaos mode)"
} else {
"best"
}
);
let mut keep_going = true;
while keep_going {
keep_going = false;
Expand All @@ -248,7 +269,17 @@ impl<'a> Elaborator<'a> {
// is a `(cost, value)` tuple; `cost` comes first so
// the natural comparison works based on cost, and
// breaks ties based on value number.
best[value] = std::cmp::min(best[x], best[y]);
best[value] = if use_worst {
Copy link
Member

@cfallin cfallin Feb 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there' a way to encapsulate this? I realize it's a bit tricky because the order relation actually changes based on use_worst (reserved values come "last" when picking mins, and "first" when picking maxes). Maybe something like pick_non_reserved_with_cmp(|x, y| x <= y) or something like that, then picked_non_reserved_with_cmp takes two (Cost, Value) tuples and has the if-ladder?

Maybe that's just the same complexity as before, I dunno; just code-golfing a bit to see if we can make it prettier :-)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discussed this briefly while writing, and decided on this as it's more explicit and it's used only here.

if best[x].1.is_reserved_value() {
best[y]
} else if best[y].1.is_reserved_value() {
best[x]
} else {
std::cmp::max(best[x], best[y])
}
} else {
std::cmp::min(best[x], best[y])
};
trace!(
" -> best of union({:?}, {:?}) = {:?}",
best[x],
Expand Down Expand Up @@ -776,7 +807,7 @@ impl<'a> Elaborator<'a> {
// traversal so we do this after processing this
// block above.
let block_stack_end = self.block_stack.len();
for child in domtree.children(block) {
for child in self.ctrl_plane.shuffled(domtree.children(block)) {
self.block_stack.push(BlockStackEntry::Elaborate {
block: child,
idom: Some(block),
Expand Down
3 changes: 2 additions & 1 deletion cranelift/filetests/src/test_optimize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use crate::subtest::{check_precise_output, run_filecheck, Context, SubTest};
use anyhow::Result;
use cranelift_codegen::ir;
use cranelift_control::ControlPlane;
use cranelift_reader::{TestCommand, TestOption};
use std::borrow::Cow;

Expand Down Expand Up @@ -53,7 +54,7 @@ impl SubTest for TestOptimize {
let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned());

comp_ctx
.optimize(isa)
.optimize(isa, &mut ControlPlane::default())
.map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?;

let clif = format!("{:?}", comp_ctx.func);
Expand Down
3 changes: 2 additions & 1 deletion cranelift/filetests/src/test_wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod env;

use anyhow::{bail, ensure, Context, Result};
use config::TestConfig;
use cranelift_control::ControlPlane;
use env::ModuleEnv;
use similar::TextDiff;
use std::{fmt::Write, path::Path};
Expand Down Expand Up @@ -64,7 +65,7 @@ pub fn run(path: &Path, wat: &str) -> Result<()> {
writeln!(&mut actual, "{}", code.vcode.as_ref().unwrap()).unwrap();
} else if config.optimize {
let mut ctx = cranelift_codegen::Context::for_function(func.clone());
ctx.optimize(isa)
ctx.optimize(isa, &mut ControlPlane::default())
.map_err(|e| crate::pretty_anyhow_error(&ctx.func, e))?;
writeln!(&mut actual, "{}", ctx.func.display()).unwrap();
} else {
Expand Down
3 changes: 2 additions & 1 deletion cranelift/src/souper_harvest.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use anyhow::{Context as _, Result};
use clap::Parser;
use cranelift_codegen::control::ControlPlane;
use cranelift_codegen::Context;
use cranelift_reader::parse_sets_and_triple;
use cranelift_wasm::DummyEnvironment;
Expand Down Expand Up @@ -133,7 +134,7 @@ pub fn run(options: &Options) -> Result<()> {
let mut ctx = Context::new();
ctx.func = func;

ctx.optimize(fisa.isa.unwrap())
ctx.optimize(fisa.isa.unwrap(), &mut ControlPlane::default())
.context("failed to run optimizations")?;

ctx.souper_harvest(send)
Expand Down
8 changes: 5 additions & 3 deletions fuzz/fuzz_targets/cranelift-fuzzgen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,20 +236,22 @@ impl TestCase {
}

fn to_optimized(&self) -> Self {
let mut ctrl_planes = self.ctrl_planes.clone();
let optimized_functions: Vec<Function> = self
.functions
.iter()
.map(|func| {
.zip(ctrl_planes.iter_mut())
.map(|(func, ctrl_plane)| {
let mut ctx = Context::for_function(func.clone());
ctx.optimize(self.isa.as_ref()).unwrap();
ctx.optimize(self.isa.as_ref(), ctrl_plane).unwrap();
ctx.func
})
.collect();

TestCase {
isa: self.isa.clone(),
functions: optimized_functions,
ctrl_planes: self.ctrl_planes.clone(),
ctrl_planes,
inputs: self.inputs.clone(),
compare_against_host: false,
}
Expand Down