From 8d907ce7f2a72e7ba2356fcc7121e31a135a76ad Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Tue, 20 Feb 2024 11:48:10 -0800 Subject: [PATCH] cranelift: Enable "chaos mode" in egraph pass First of all, thread a "chaos mode" control-plane into Context::optimize and from there into EgraphPass, OptimizeCtx, and Elaborator. In this commit we use the control-plane to change the following behaviors in ways which shouldn't cause incorrect results: - Dominator-tree block traversal order for both the rule application and elaboration passes - Order of evaluating optimization alternatives from `simplify` - Choose worst values instead of best in each eclass Co-authored-by: L. Pereira --- cranelift/codegen/src/context.rs | 17 ++++-- cranelift/codegen/src/egraph.rs | 74 +++++++++++------------ cranelift/codegen/src/egraph/elaborate.rs | 37 +++++++++++- cranelift/filetests/src/test_optimize.rs | 3 +- cranelift/filetests/src/test_wasm.rs | 3 +- cranelift/src/souper_harvest.rs | 3 +- fuzz/fuzz_targets/cranelift-fuzzgen.rs | 8 ++- 7 files changed, 94 insertions(+), 51 deletions(-) diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 7ea5a1513782..cc9719e5f46f 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -141,7 +141,7 @@ impl Context { self.verify_if(isa)?; - self.optimize(isa)?; + self.optimize(isa, ctrl_plane)?; isa.compile_function(&self.func, &self.domtree, self.want_disasm, ctrl_plane) } @@ -151,7 +151,11 @@ impl Context { /// allocation. /// /// Public only for testing purposes. - pub fn optimize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + pub fn optimize( + &mut self, + isa: &dyn TargetIsa, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<()> { log::debug!( "Number of CLIF instructions to optimize: {}", self.func.dfg.num_insts() @@ -185,7 +189,7 @@ impl Context { self.remove_constant_phis(isa)?; if opt_level != OptLevel::None { - self.egraph_pass(isa)?; + self.egraph_pass(isa, ctrl_plane)?; } Ok(()) @@ -347,7 +351,11 @@ impl Context { } /// Run optimizations via the egraph infrastructure. - pub fn egraph_pass<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()> + pub fn egraph_pass<'a, FOI>( + &mut self, + fisa: FOI, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<()> where FOI: Into>, { @@ -364,6 +372,7 @@ impl Context { &self.domtree, &self.loop_analysis, &mut alias_analysis, + ctrl_plane, ); pass.run(); log::debug!("egraph stats: {:?}", pass.stats); diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs index fd89743951b5..cd9adcba800b 100644 --- a/cranelift/codegen/src/egraph.rs +++ b/cranelift/codegen/src/egraph.rs @@ -15,6 +15,7 @@ use crate::opts::IsleContext; use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap}; use crate::trace; use crate::unionfind::UnionFind; +use cranelift_control::ControlPlane; use cranelift_entity::packed_option::ReservedValue; use cranelift_entity::SecondaryMap; use smallvec::SmallVec; @@ -53,6 +54,9 @@ pub struct EgraphPass<'a> { /// Loop analysis results, used for built-in LICM during /// elaboration. loop_analysis: &'a LoopAnalysis, + /// Chaos-mode control-plane so we can test that we still get + /// correct results when our heuristics make bad decisions. + ctrl_plane: &'a mut ControlPlane, /// Which canonical Values do we want to rematerialize in each /// block where they're used? /// @@ -84,6 +88,7 @@ where pub(crate) stats: &'opt mut Stats, pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>, pub(crate) alias_analysis_state: &'opt mut LastStores, + ctrl_plane: &'opt mut ControlPlane, // Held locally during optimization of one node (recursively): pub(crate) rewrite_depth: usize, pub(crate) subsume_values: FxHashSet, @@ -217,7 +222,6 @@ where let orig_value = self.func.dfg.first_result(inst); let mut optimized_values = std::mem::take(&mut self.optimized_values); - let mut isle_ctx = IsleContext { ctx: self }; // Limit rewrite depth. When we apply optimization rules, they // may create new nodes (values) and those are, recursively, @@ -229,23 +233,20 @@ where // infinite or problematic recursion, we bound the rewrite // depth to a small constant here. const REWRITE_LIMIT: usize = 5; - if isle_ctx.ctx.rewrite_depth > REWRITE_LIMIT { - isle_ctx.ctx.stats.rewrite_depth_limit += 1; + if self.rewrite_depth > REWRITE_LIMIT { + self.stats.rewrite_depth_limit += 1; return orig_value; } - isle_ctx.ctx.rewrite_depth += 1; - trace!( - "Incrementing rewrite depth; now {}", - isle_ctx.ctx.rewrite_depth - ); + self.rewrite_depth += 1; + trace!("Incrementing rewrite depth; now {}", self.rewrite_depth); // Invoke the ISLE toplevel constructor, getting all new // values produced as equivalents to this value. trace!("Calling into ISLE with original value {}", orig_value); - isle_ctx.ctx.stats.rewrite_rule_invoked += 1; + self.stats.rewrite_rule_invoked += 1; debug_assert!(optimized_values.is_empty()); crate::opts::generated_code::constructor_simplify( - &mut isle_ctx, + &mut IsleContext { ctx: self }, orig_value, &mut optimized_values, ); @@ -253,6 +254,10 @@ where " -> returned from ISLE, generated {} optimized values", optimized_values.len() ); + + // It's not supposed to matter what order `simplify` returns values in. + self.ctrl_plane.shuffle(&mut optimized_values); + if optimized_values.len() > MATCHES_LIMIT { trace!("Reached maximum matches limit; too many optimized values, ignoring rest."); optimized_values.truncate(MATCHES_LIMIT); @@ -272,45 +277,30 @@ where trace!(" -> same as orig value; skipping"); continue; } - if isle_ctx.ctx.subsume_values.contains(&optimized_value) { + if self.subsume_values.contains(&optimized_value) { // Merge in the unionfind so canonicalization // still works, but take *only* the subsuming // value, and break now. - isle_ctx.ctx.eclasses.union(optimized_value, union_value); - isle_ctx - .ctx - .func - .dfg - .merge_facts(optimized_value, union_value); + self.eclasses.union(optimized_value, union_value); + self.func.dfg.merge_facts(optimized_value, union_value); union_value = optimized_value; break; } let old_union_value = union_value; - union_value = isle_ctx - .ctx - .func - .dfg - .union(old_union_value, optimized_value); - isle_ctx.ctx.stats.union += 1; + union_value = self.func.dfg.union(old_union_value, optimized_value); + self.stats.union += 1; trace!(" -> union: now {}", union_value); - isle_ctx.ctx.eclasses.add(union_value); - isle_ctx - .ctx - .eclasses - .union(old_union_value, optimized_value); - isle_ctx - .ctx - .func - .dfg - .merge_facts(old_union_value, optimized_value); - isle_ctx.ctx.eclasses.union(old_union_value, union_value); + self.eclasses.add(union_value); + self.eclasses.union(old_union_value, optimized_value); + self.func.dfg.merge_facts(old_union_value, optimized_value); + self.eclasses.union(old_union_value, union_value); } - isle_ctx.ctx.rewrite_depth -= 1; + self.rewrite_depth -= 1; - debug_assert!(isle_ctx.ctx.optimized_values.is_empty()); - isle_ctx.ctx.optimized_values = optimized_values; + debug_assert!(self.optimized_values.is_empty()); + self.optimized_values = optimized_values; union_value } @@ -400,6 +390,7 @@ impl<'a> EgraphPass<'a> { raw_domtree: &'a DominatorTree, loop_analysis: &'a LoopAnalysis, alias_analysis: &'a mut AliasAnalysis<'a>, + ctrl_plane: &'a mut ControlPlane, ) -> Self { let num_values = func.dfg.num_values(); let mut domtree = DominatorTreePreorder::new(); @@ -409,6 +400,7 @@ impl<'a> EgraphPass<'a> { domtree, loop_analysis, alias_analysis, + ctrl_plane, stats: Stats::default(), eclasses: UnionFind::with_capacity(num_values), remat_values: FxHashSet::default(), @@ -505,7 +497,11 @@ impl<'a> EgraphPass<'a> { // We popped this block; push children // immediately, then process this block. block_stack.push(StackEntry::Pop); - block_stack.extend(self.domtree.children(block).map(StackEntry::Visit)); + block_stack.extend( + self.ctrl_plane + .shuffled(self.domtree.children(block)) + .map(StackEntry::Visit), + ); effectful_gvn_map.increment_depth(); trace!("Processing block {}", block); @@ -557,6 +553,7 @@ impl<'a> EgraphPass<'a> { stats: &mut self.stats, alias_analysis: self.alias_analysis, alias_analysis_state: &mut alias_analysis_state, + ctrl_plane: self.ctrl_plane, optimized_values: Default::default(), }; @@ -614,6 +611,7 @@ impl<'a> EgraphPass<'a> { self.loop_analysis, &mut self.remat_values, &mut self.stats, + self.ctrl_plane, ); elaborator.elaborate(); diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index 0e3ca285979d..f7aa002b1c48 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -12,6 +12,7 @@ use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::scoped_hash_map::ScopedHashMap; use crate::trace; use alloc::vec::Vec; +use cranelift_control::ControlPlane; use cranelift_entity::{packed_option::ReservedValue, SecondaryMap}; use smallvec::{smallvec, SmallVec}; @@ -64,6 +65,9 @@ pub(crate) struct Elaborator<'a> { /// Stats for various events during egraph processing, to help /// with optimization of this infrastructure. stats: &'a mut Stats, + /// Chaos-mode control-plane so we can test that we still get + /// correct results when our heuristics make bad decisions. + ctrl_plane: &'a mut ControlPlane, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -139,6 +143,7 @@ impl<'a> Elaborator<'a> { loop_analysis: &'a LoopAnalysis, remat_values: &'a FxHashSet, stats: &'a mut Stats, + ctrl_plane: &'a mut ControlPlane, ) -> Self { let num_values = func.dfg.num_values(); let mut value_to_best_value = @@ -158,6 +163,7 @@ impl<'a> Elaborator<'a> { block_stack: vec![], remat_copies: FxHashMap::default(), stats, + ctrl_plane, } } @@ -215,6 +221,14 @@ impl<'a> Elaborator<'a> { fn compute_best_values(&mut self) { let best = &mut self.value_to_best_value; + // We can't make random decisions inside the fixpoint loop below because + // that could cause values to change on every iteration of the loop, + // which would make the loop never terminate. So in chaos testing + // mode we need a form of making suboptimal decisions that is fully + // deterministic. We choose to simply make the worst decision we know + // how to do instead of the best. + let use_worst = self.ctrl_plane.get_decision(); + // Do a fixpoint loop to compute the best value for each eclass. // // The maximum number of iterations is the length of the longest chain @@ -227,7 +241,14 @@ impl<'a> Elaborator<'a> { // which we don't really care about, or (b) the CLIF producer did // something weird, in which case it is their responsibility to stop // doing that. - trace!("Entering fixpoint loop to compute the best values for each eclass"); + trace!( + "Entering fixpoint loop to compute the {} values for each eclass", + if use_worst { + "worst (chaos mode)" + } else { + "best" + } + ); let mut keep_going = true; while keep_going { keep_going = false; @@ -248,7 +269,17 @@ impl<'a> Elaborator<'a> { // is a `(cost, value)` tuple; `cost` comes first so // the natural comparison works based on cost, and // breaks ties based on value number. - best[value] = std::cmp::min(best[x], best[y]); + best[value] = if use_worst { + if best[x].1.is_reserved_value() { + best[y] + } else if best[y].1.is_reserved_value() { + best[x] + } else { + std::cmp::max(best[x], best[y]) + } + } else { + std::cmp::min(best[x], best[y]) + }; trace!( " -> best of union({:?}, {:?}) = {:?}", best[x], @@ -776,7 +807,7 @@ impl<'a> Elaborator<'a> { // traversal so we do this after processing this // block above. let block_stack_end = self.block_stack.len(); - for child in domtree.children(block) { + for child in self.ctrl_plane.shuffled(domtree.children(block)) { self.block_stack.push(BlockStackEntry::Elaborate { block: child, idom: Some(block), diff --git a/cranelift/filetests/src/test_optimize.rs b/cranelift/filetests/src/test_optimize.rs index fd76a8fe041f..4550d5ab037d 100644 --- a/cranelift/filetests/src/test_optimize.rs +++ b/cranelift/filetests/src/test_optimize.rs @@ -10,6 +10,7 @@ use crate::subtest::{check_precise_output, run_filecheck, Context, SubTest}; use anyhow::Result; use cranelift_codegen::ir; +use cranelift_control::ControlPlane; use cranelift_reader::{TestCommand, TestOption}; use std::borrow::Cow; @@ -53,7 +54,7 @@ impl SubTest for TestOptimize { let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); comp_ctx - .optimize(isa) + .optimize(isa, &mut ControlPlane::default()) .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; let clif = format!("{:?}", comp_ctx.func); diff --git a/cranelift/filetests/src/test_wasm.rs b/cranelift/filetests/src/test_wasm.rs index c3bcc25ca3f0..fe3b0b082974 100644 --- a/cranelift/filetests/src/test_wasm.rs +++ b/cranelift/filetests/src/test_wasm.rs @@ -5,6 +5,7 @@ mod env; use anyhow::{bail, ensure, Context, Result}; use config::TestConfig; +use cranelift_control::ControlPlane; use env::ModuleEnv; use similar::TextDiff; use std::{fmt::Write, path::Path}; @@ -64,7 +65,7 @@ pub fn run(path: &Path, wat: &str) -> Result<()> { writeln!(&mut actual, "{}", code.vcode.as_ref().unwrap()).unwrap(); } else if config.optimize { let mut ctx = cranelift_codegen::Context::for_function(func.clone()); - ctx.optimize(isa) + ctx.optimize(isa, &mut ControlPlane::default()) .map_err(|e| crate::pretty_anyhow_error(&ctx.func, e))?; writeln!(&mut actual, "{}", ctx.func.display()).unwrap(); } else { diff --git a/cranelift/src/souper_harvest.rs b/cranelift/src/souper_harvest.rs index f1d1c11f37cd..50b55c2adaa5 100644 --- a/cranelift/src/souper_harvest.rs +++ b/cranelift/src/souper_harvest.rs @@ -1,5 +1,6 @@ use anyhow::{Context as _, Result}; use clap::Parser; +use cranelift_codegen::control::ControlPlane; use cranelift_codegen::Context; use cranelift_reader::parse_sets_and_triple; use cranelift_wasm::DummyEnvironment; @@ -133,7 +134,7 @@ pub fn run(options: &Options) -> Result<()> { let mut ctx = Context::new(); ctx.func = func; - ctx.optimize(fisa.isa.unwrap()) + ctx.optimize(fisa.isa.unwrap(), &mut ControlPlane::default()) .context("failed to run optimizations")?; ctx.souper_harvest(send) diff --git a/fuzz/fuzz_targets/cranelift-fuzzgen.rs b/fuzz/fuzz_targets/cranelift-fuzzgen.rs index c591fcaa6fe1..c3c1b19d76cf 100644 --- a/fuzz/fuzz_targets/cranelift-fuzzgen.rs +++ b/fuzz/fuzz_targets/cranelift-fuzzgen.rs @@ -236,12 +236,14 @@ impl TestCase { } fn to_optimized(&self) -> Self { + let mut ctrl_planes = self.ctrl_planes.clone(); let optimized_functions: Vec = self .functions .iter() - .map(|func| { + .zip(ctrl_planes.iter_mut()) + .map(|(func, ctrl_plane)| { let mut ctx = Context::for_function(func.clone()); - ctx.optimize(self.isa.as_ref()).unwrap(); + ctx.optimize(self.isa.as_ref(), ctrl_plane).unwrap(); ctx.func }) .collect(); @@ -249,7 +251,7 @@ impl TestCase { TestCase { isa: self.isa.clone(), functions: optimized_functions, - ctrl_planes: self.ctrl_planes.clone(), + ctrl_planes, inputs: self.inputs.clone(), compare_against_host: false, }