chore(ssa refactor): Add pass to simplify the control flow graph (#1373)

* Add simplify_cfg pass * Amend comment * Remove block arguments for blocks with 1 predecessor * Add comment
noir-lang · May 22, 2023 · c4d8858 · c4d8858
1 parent a69e2c0
commit c4d8858
Show file tree

Hide file tree

Showing 5 changed files with 252 additions and 12 deletions.
diff --git a/crates/noirc_evaluator/src/ssa_refactor/ir/basic_block.rs b/crates/noirc_evaluator/src/ssa_refactor/ir/basic_block.rs
@@ -40,6 +40,11 @@ impl BasicBlock {
         &self.parameters
     }
 
+    /// Removes all the parameters of this block
+    pub(crate) fn take_parameters(&mut self) -> Vec<ValueId> {
+        std::mem::take(&mut self.parameters)
+    }
+
     /// Adds a parameter to this BasicBlock.
     /// Expects that the ValueId given should refer to a Value::Param
     /// instance with its position equal to self.parameters.len().
@@ -102,4 +107,23 @@ impl BasicBlock {
             });
         self.instructions.remove(index);
     }
+
+    /// Take ownership of this block's terminator, replacing it with an empty return terminator
+    /// so that no clone is needed.
+    ///
+    /// It is expected that this function is used as an optimization on blocks that are no longer
+    /// reachable or will have their terminator overwritten afterwards. Using this on a reachable
+    /// block without setting the terminator afterward will result in the empty return terminator
+    /// being kept, which is likely unwanted.
+    pub(crate) fn take_terminator(&mut self) -> TerminatorInstruction {
+        let terminator = self.terminator.as_mut().expect("Expected block to have a terminator");
+        std::mem::replace(terminator, TerminatorInstruction::Return { return_values: Vec::new() })
+    }
+
+    /// Returns a mutable reference to the terminator of this block.
+    ///
+    /// Once this block has finished construction, this is expected to always be Some.
+    pub(crate) fn unwrap_terminator_mut(&mut self) -> &mut TerminatorInstruction {
+        self.terminator.as_mut().expect("Expected block to have terminator instruction")
+    }
 }
diff --git a/crates/noirc_evaluator/src/ssa_refactor/ir/dfg.rs b/crates/noirc_evaluator/src/ssa_refactor/ir/dfg.rs
@@ -123,6 +123,21 @@ impl DataFlowGraph {
         self.values.insert(value)
     }
 
+    /// Replaces the value specified by the given ValueId with a new Value.
+    ///
+    /// This is the preferred method to call for optimizations simplifying
+    /// values since other instructions referring to the same ValueId need
+    /// not be modified to refer to a new ValueId.
+    pub(crate) fn set_value(&mut self, value_id: ValueId, new_value: Value) {
+        self.values[value_id] = new_value;
+    }
+
+    /// Set the value of value_to_replace to refer to the value referred to by new_value.
+    pub(crate) fn set_value_from_id(&mut self, value_to_replace: ValueId, new_value: ValueId) {
+        let new_value = self.values[new_value];
+        self.values[value_to_replace] = new_value;
+    }
+
     /// Creates a new constant value, or returns the Id to an existing one if
     /// one already exists.
     pub(crate) fn make_constant(&mut self, value: FieldElement, typ: Type) -> ValueId {
@@ -161,9 +176,7 @@ impl DataFlowGraph {
 
         // Get all of the types that this instruction produces
         // and append them as results.
-        let typs = self.instruction_result_types(instruction_id, ctrl_typevars);
-
-        for typ in typs {
+        for typ in self.instruction_result_types(instruction_id, ctrl_typevars) {
             self.append_result(instruction_id, typ);
         }
     }
@@ -269,15 +282,6 @@ impl DataFlowGraph {
     ) {
         self.blocks[block].set_terminator(terminator);
     }
-
-    /// Replaces the value specified by the given ValueId with a new Value.
-    ///
-    /// This is the preferred method to call for optimizations simplifying
-    /// values since other instructions referring to the same ValueId need
-    /// not be modified to refer to a new ValueId.
-    pub(crate) fn set_value(&mut self, value_id: ValueId, new_value: Value) {
-        self.values[value_id] = new_value;
-    }
 }
 
 impl std::ops::Index<InstructionId> for DataFlowGraph {

diff --git a/crates/noirc_evaluator/src/ssa_refactor/ir/function.rs b/crates/noirc_evaluator/src/ssa_refactor/ir/function.rs
@@ -1,3 +1,5 @@
+use std::collections::HashSet;
+
 use super::basic_block::BasicBlockId;
 use super::dfg::DataFlowGraph;
 use super::map::Id;
@@ -61,6 +63,23 @@ impl Function {
     pub(crate) fn parameters(&self) -> &[ValueId] {
         self.dfg.block_parameters(self.entry_block)
     }
+
+    /// Collects all the reachable blocks of this function.
+    ///
+    /// Note that self.dfg.basic_blocks_iter() iterates over all blocks,
+    /// whether reachable or not. This function should be used if you
+    /// want to iterate only reachable blocks.
+    pub(crate) fn reachable_blocks(&self) -> HashSet<BasicBlockId> {
+        let mut blocks = HashSet::new();
+        let mut stack = vec![self.entry_block];
+
+        while let Some(block) = stack.pop() {
+            if blocks.insert(block) {
+                stack.extend(self.dfg[block].successors());
+            }
+        }
+        blocks
+    }
 }
 
 /// FunctionId is a reference for a function

diff --git a/crates/noirc_evaluator/src/ssa_refactor/opt/mod.rs b/crates/noirc_evaluator/src/ssa_refactor/opt/mod.rs
@@ -5,3 +5,4 @@
 //! Generally, these passes are also expected to minimize the final amount of instructions.
 mod inlining;
 mod mem2reg;
+mod simplify_cfg;
diff --git a/crates/noirc_evaluator/src/ssa_refactor/opt/simplify_cfg.rs b/crates/noirc_evaluator/src/ssa_refactor/opt/simplify_cfg.rs
@@ -0,0 +1,192 @@
+//! This file contains the simplify cfg pass of the SSA IR.
+//!
+//! This is a rather simple pass that is expected to be cheap to perform. It:
+//! 1. Removes blocks with no predecessors
+//! 2. Inlines a block into its sole predecessor if that predecessor only has one successor.
+//! 3. Removes any block arguments for blocks with only a single predecessor.
+//! 4. Removes any blocks which have no instructions other than a single terminating jmp.
+//!
+//! Currently, only 2 and 3 are implemented.
+use std::collections::HashSet;
+
+use crate::ssa_refactor::{
+    ir::{
+        basic_block::BasicBlockId, cfg::ControlFlowGraph, function::Function,
+        instruction::TerminatorInstruction,
+    },
+    ssa_gen::Ssa,
+};
+
+impl Ssa {
+    /// Simplify each function's control flow graph by:
+    /// 1. Removing blocks with no predecessors
+    /// 2. Inlining a block into its sole predecessor if that predecessor only has one successor.
+    /// 3. Removing any block arguments for blocks with only a single predecessor.
+    /// 4. Removing any blocks which have no instructions other than a single terminating jmp.
+    ///
+    /// Currently, only 2 and 3 are implemented.
+    pub(crate) fn simplify_cfg(mut self) -> Self {
+        for function in self.functions.values_mut() {
+            simplify_function(function);
+        }
+        self
+    }
+}
+
+/// Simplify a function's cfg by going through each block to check for any simple blocks that can
+/// be inlined into their predecessor.
+fn simplify_function(function: &mut Function) {
+    let mut cfg = ControlFlowGraph::with_function(function);
+    let mut stack = vec![function.entry_block()];
+    let mut visited = HashSet::new();
+
+    while let Some(block) = stack.pop() {
+        if visited.insert(block) {
+            stack.extend(function.dfg[block].successors().filter(|block| !visited.contains(block)));
+        }
+
+        let mut predecessors = cfg.predecessors(block);
+
+        if predecessors.len() == 1 {
+            let predecessor = predecessors.next().expect("Already checked length of predecessors");
+            drop(predecessors);
+
+            // If the block has only 1 predecessor, we can safely remove its block parameters
+            remove_block_parameters(function, block, predecessor);
+
+            // Note: this function relies on `remove_block_parameters` being called first.
+            // Otherwise the inlined block will refer to parameters that no longer exist.
+            //
+            // If successful, `block` will be empty and unreachable after this call, so any
+            // optimizations performed after this point on the same block should check if
+            // the inlining here was successful before continuing.
+            try_inline_into_predecessor(function, &mut cfg, block, predecessor);
+        }
+    }
+}
+
+/// If the given block has block parameters, replace them with the jump arguments from the predecessor.
+///
+/// Currently, if this function is needed, `try_inline_into_predecessor` will also always apply,
+/// although in the future it is possible for only this function to apply if jmpif instructions
+/// with block arguments are ever added.
+fn remove_block_parameters(
+    function: &mut Function,
+    block: BasicBlockId,
+    predecessor: BasicBlockId,
+) {
+    let block = &mut function.dfg[block];
+
+    if !block.parameters().is_empty() {
+        let block_params = block.take_parameters();
+
+        let jump_args = match function.dfg[predecessor].unwrap_terminator_mut() {
+            TerminatorInstruction::Jmp { arguments, .. } => std::mem::take(arguments),
+            TerminatorInstruction::JmpIf { .. } => unreachable!("If jmpif instructions are modified to support block arguments in the future, this match will need to be updated"),
+            _ => unreachable!(
+                "Predecessor was already validated to have only a single jmp destination"
+            ),
+        };
+
+        assert_eq!(block_params.len(), jump_args.len());
+        for (param, arg) in block_params.iter().zip(jump_args) {
+            function.dfg.set_value_from_id(*param, arg);
+        }
+    }
+}
+
+/// Try to inline a block into its predecessor, returning true if successful.
+///
+/// This will only occur if the predecessor's only successor is the given block.
+/// It is also expected that the given block's only predecessor is the given one.
+fn try_inline_into_predecessor(
+    function: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    block_id: BasicBlockId,
+    predecessor_id: BasicBlockId,
+) -> bool {
+    let mut successors = cfg.successors(predecessor_id);
+    if successors.len() == 1 && successors.next() == Some(block_id) {
+        drop(successors);
+
+        // First remove all the instructions and terminator from the block we're removing
+        let block = &mut function.dfg[block_id];
+        let mut instructions = std::mem::take(block.instructions_mut());
+        let terminator = block.take_terminator();
+
+        // Then append each to the predecessor
+        let predecessor = &mut function.dfg[predecessor_id];
+        predecessor.instructions_mut().append(&mut instructions);
+
+        predecessor.set_terminator(terminator);
+        cfg.recompute_block(function, block_id);
+        cfg.recompute_block(function, predecessor_id);
+        true
+    } else {
+        false
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::ssa_refactor::{
+        ir::{instruction::TerminatorInstruction, map::Id, types::Type},
+        ssa_builder::FunctionBuilder,
+    };
+
+    #[test]
+    fn inline_blocks() {
+        // fn main {
+        //   b0():
+        //     jmp b1(Field 7)
+        //   b1(v0: Field):
+        //     jmp b2(v0)
+        //   b2(v1: Field):
+        //     return v1
+        // }
+        let main_id = Id::test_new(0);
+        let mut builder = FunctionBuilder::new("main".into(), main_id);
+
+        let b1 = builder.insert_block();
+        let b2 = builder.insert_block();
+
+        let v0 = builder.add_block_parameter(b1, Type::field());
+        let v1 = builder.add_block_parameter(b2, Type::field());
+
+        let expected_return = 7u128;
+        let seven = builder.field_constant(expected_return);
+        builder.terminate_with_jmp(b1, vec![seven]);
+
+        builder.switch_to_block(b1);
+        builder.terminate_with_jmp(b2, vec![v0]);
+
+        builder.switch_to_block(b2);
+        builder.terminate_with_return(vec![v1]);
+
+        let ssa = builder.finish();
+        assert_eq!(ssa.main().reachable_blocks().len(), 3);
+
+        // Expected output:
+        // fn main {
+        //   b0():
+        //     return Field 7
+        // }
+        let ssa = ssa.simplify_cfg();
+        let main = ssa.main();
+        println!("{}", main);
+        assert_eq!(main.reachable_blocks().len(), 1);
+
+        match main.dfg[main.entry_block()].terminator() {
+            Some(TerminatorInstruction::Return { return_values }) => {
+                assert_eq!(return_values.len(), 1);
+                let return_value = main
+                    .dfg
+                    .get_numeric_constant(return_values[0])
+                    .expect("Expected return value to be constant")
+                    .to_u128();
+                assert_eq!(return_value, expected_return);
+            }
+            other => panic!("Unexpected terminator {other:?}"),
+        }
+    }
+}