From 8f41beef025b7c28fe09bacdf5fe29d1142a5d74 Mon Sep 17 00:00:00 2001 From: Tom French <15848336+TomAFrench@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:21:44 +0000 Subject: [PATCH] chore: pull out reference changes from sync PR (#9967) This PR pulls out Jake's changes to how we handle references. --- .../noirc_evaluator/src/ssa/ir/types.rs | 11 ++ .../noirc_evaluator/src/ssa/opt/mem2reg.rs | 126 +++++++++++++++--- .../src/ssa/opt/mem2reg/alias_set.rs | 7 + .../execution_success/references/src/main.nr | 18 ++- 4 files changed, 141 insertions(+), 21 deletions(-) diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/types.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/types.rs index 203356de8e6..130f1d59e46 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/types.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/types.rs @@ -204,6 +204,17 @@ impl Type { Type::Slice(element_types) | Type::Array(element_types, _) => element_types[0].first(), } } + + /// True if this is a reference type or if it is a composite type which contains a reference. + pub(crate) fn contains_reference(&self) -> bool { + match self { + Type::Reference(_) => true, + Type::Numeric(_) | Type::Function => false, + Type::Array(elements, _) | Type::Slice(elements) => { + elements.iter().any(|elem| elem.contains_reference()) + } + } + } } /// Composite Types are essentially flattened struct or tuple types. diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs index 95a95157114..a052abc5e16 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs @@ -212,37 +212,32 @@ impl<'f> PerFunctionContext<'f> { all_terminator_values: &HashSet, per_func_block_params: &HashSet, ) -> bool { - let func_params = self.inserter.function.parameters(); - let reference_parameters = func_params - .iter() - .filter(|param| self.inserter.function.dfg.value_is_reference(**param)) - .collect::>(); + let reference_parameters = self.reference_parameters(); - let mut store_alias_used = false; if let Some(expression) = block.expressions.get(store_address) { if let Some(aliases) = block.aliases.get(expression) { let allocation_aliases_parameter = aliases.any(|alias| reference_parameters.contains(&alias)); if allocation_aliases_parameter == Some(true) { - store_alias_used = true; + return true; } let allocation_aliases_parameter = aliases.any(|alias| per_func_block_params.contains(&alias)); if allocation_aliases_parameter == Some(true) { - store_alias_used = true; + return true; } let allocation_aliases_parameter = aliases.any(|alias| self.calls_reference_input.contains(&alias)); if allocation_aliases_parameter == Some(true) { - store_alias_used = true; + return true; } let allocation_aliases_parameter = aliases.any(|alias| all_terminator_values.contains(&alias)); if allocation_aliases_parameter == Some(true) { - store_alias_used = true; + return true; } let allocation_aliases_parameter = aliases.any(|alias| { @@ -252,14 +247,25 @@ impl<'f> PerFunctionContext<'f> { false } }); - if allocation_aliases_parameter == Some(true) { - store_alias_used = true; + return true; } } } - store_alias_used + false + } + + /// Collect the input parameters of the function which are of reference type. + /// All references are mutable, so these inputs are shared with the function caller + /// and thus stores should not be eliminated, even if the blocks in this function + /// don't use them anywhere. + fn reference_parameters(&self) -> BTreeSet { + let parameters = self.inserter.function.parameters().iter(); + parameters + .filter(|param| self.inserter.function.dfg.value_is_reference(**param)) + .copied() + .collect() } fn recursively_add_values(&self, value: ValueId, set: &mut HashSet) { @@ -300,6 +306,12 @@ impl<'f> PerFunctionContext<'f> { fn analyze_block(&mut self, block: BasicBlockId, mut references: Block) { let instructions = self.inserter.function.dfg[block].take_instructions(); + // If this is the entry block, take all the block parameters and assume they may + // be aliased to each other + if block == self.inserter.function.entry_block() { + self.add_aliases_for_reference_parameters(block, &mut references); + } + for instruction in instructions { self.analyze_instruction(block, &mut references, instruction); } @@ -316,13 +328,51 @@ impl<'f> PerFunctionContext<'f> { self.blocks.insert(block, references); } + /// Go through each parameter and register that all reference parameters of the same type are + /// possibly aliased to each other. If there are parameters with nested references (arrays of + /// references or references containing other references) we give up and assume all parameter + /// references are `AliasSet::unknown()`. + fn add_aliases_for_reference_parameters(&self, block: BasicBlockId, references: &mut Block) { + let dfg = &self.inserter.function.dfg; + let params = dfg.block_parameters(block); + + let mut aliases: HashMap = HashMap::default(); + + for param in params { + match dfg.type_of_value(*param) { + // If the type indirectly contains a reference we have to assume all references + // are unknown since we don't have any ValueIds to use. + Type::Reference(element) if element.contains_reference() => return, + Type::Reference(element) => { + let empty_aliases = AliasSet::known_empty(); + let alias_set = + aliases.entry(element.as_ref().clone()).or_insert(empty_aliases); + alias_set.insert(*param); + } + typ if typ.contains_reference() => return, + _ => continue, + } + } + + for aliases in aliases.into_values() { + let first = aliases.first(); + let first = first.expect("All parameters alias at least themselves or we early return"); + + let expression = Expression::Other(first); + let previous = references.aliases.insert(expression.clone(), aliases.clone()); + assert!(previous.is_none()); + + aliases.for_each(|alias| { + let previous = references.expressions.insert(alias, expression.clone()); + assert!(previous.is_none()); + }); + } + } + /// Add all instructions in `last_stores` to `self.instructions_to_remove` which do not /// possibly alias any parameters of the given function. fn remove_stores_that_do_not_alias_parameters(&mut self, references: &Block) { - let parameters = self.inserter.function.parameters().iter(); - let reference_parameters = parameters - .filter(|param| self.inserter.function.dfg.value_is_reference(**param)) - .collect::>(); + let reference_parameters = self.reference_parameters(); for (allocation, instruction) in &references.last_stores { if let Some(expression) = references.expressions.get(allocation) { @@ -466,6 +516,8 @@ impl<'f> PerFunctionContext<'f> { } } + /// If `array` is an array constant that contains reference types, then insert each element + /// as a potential alias to the array itself. fn check_array_aliasing(&self, references: &mut Block, array: ValueId) { if let Some((elements, typ)) = self.inserter.function.dfg.get_array_constant(array) { if Self::contains_references(&typ) { @@ -961,4 +1013,44 @@ mod tests { assert_eq!(count_loads(b2, &main.dfg), 1); assert_eq!(count_loads(b3, &main.dfg), 3); } + + #[test] + fn parameter_alias() { + // Do not assume parameters are not aliased to each other. + // The load below shouldn't be removed since `v0` could + // be aliased to `v1`. + // + // fn main f0 { + // b0(v0: &mut Field, v1: &mut Field): + // store Field 0 at v0 + // store Field 1 at v1 + // v4 = load v0 + // constrain v4 == Field 1 + // return + // } + let main_id = Id::test_new(0); + let mut builder = FunctionBuilder::new("main".into(), main_id); + + let field_ref = Type::Reference(Arc::new(Type::field())); + let v0 = builder.add_parameter(field_ref.clone()); + let v1 = builder.add_parameter(field_ref.clone()); + + let zero = builder.field_constant(0u128); + let one = builder.field_constant(0u128); + builder.insert_store(v0, zero); + builder.insert_store(v1, one); + + let v4 = builder.insert_load(v0, Type::field()); + builder.insert_constrain(v4, one, None); + builder.terminate_with_return(Vec::new()); + + let ssa = builder.finish(); + let main = ssa.main(); + assert_eq!(count_loads(main.entry_block(), &main.dfg), 1); + + // No change expected + let ssa = ssa.mem2reg(); + let main = ssa.main(); + assert_eq!(count_loads(main.entry_block(), &main.dfg), 1); + } } diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs index 5477025e429..4d768caa36b 100644 --- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs +++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs @@ -73,4 +73,11 @@ impl AliasSet { } } } + + /// Return the first ValueId in the alias set as long as there is at least one. + /// The ordering is arbitrary (by lowest ValueId) so this method should only be + /// used when you need an arbitrary ValueId from the alias set. + pub(super) fn first(&self) -> Option { + self.aliases.as_ref().and_then(|aliases| aliases.first().copied()) + } } diff --git a/noir/noir-repo/test_programs/execution_success/references/src/main.nr b/noir/noir-repo/test_programs/execution_success/references/src/main.nr index 4ddbf8aa1e3..65d70dc4a5f 100644 --- a/noir/noir-repo/test_programs/execution_success/references/src/main.nr +++ b/noir/noir-repo/test_programs/execution_success/references/src/main.nr @@ -1,7 +1,6 @@ fn main(mut x: Field) { add1(&mut x); assert(x == 3); - let mut s = S { y: x }; s.add2(); assert(s.y == 5); @@ -21,18 +20,16 @@ fn main(mut x: Field) { let mut c = C { foo: 0, bar: &mut C2 { array: &mut [1, 2] } }; *c.bar.array = [3, 4]; assert(*c.bar.array == [3, 4]); - regression_1887(); regression_2054(); regression_2030(); regression_2255(); - + regression_6443(); assert(x == 3); regression_2218_if_inner_if(x, 10); regression_2218_if_inner_else(20, x); regression_2218_else(x, 3); regression_2218_loop(x, 10); - regression_2560(s_ref); } @@ -106,6 +103,7 @@ fn regression_2030() { let _ = *array[0]; *array[0] = 1; } + // The `mut x: &mut ...` caught a bug handling lvalues where a double-dereference would occur internally // in one step rather than being tracked by two separate steps. This lead to assigning the 1 value to the // incorrect outer `mut` reference rather than the correct `&mut` reference. @@ -119,6 +117,18 @@ fn regression_2255_helper(mut x: &mut Field) { *x = 1; } +// Similar to `regression_2255` but without the double-dereferencing. +// The test checks that `mem2reg` does not eliminate storing to a reference passed as a parameter. +fn regression_6443() { + let x = &mut 0; + regression_6443_helper(x); + assert(*x == 1); +} + +fn regression_6443_helper(x: &mut Field) { + *x = 1; +} + fn regression_2218(x: Field, y: Field) -> Field { let q = &mut &mut 0; let q1 = *q;