From adbe91a84dc2d291dae36fb70bc227762e323d06 Mon Sep 17 00:00:00 2001 From: Benjy Weinberger Date: Tue, 12 Nov 2024 07:24:46 -0800 Subject: [PATCH] Pass derivation details through from Rust. (#21631) This allows us to specify which config file each value came from. We intern the details strings, as they are very repetitive. This requires tying the lifetime of various data structures to that of the GIL. --- .../pants/engine/internals/native_engine.pyi | 6 +- src/python/pants/help/help_info_extracter.py | 15 +- .../pants/help/help_info_extracter_test.py | 8 +- src/python/pants/option/native_options.py | 10 +- src/rust/engine/src/externs/options.rs | 205 +++++++++++------- 5 files changed, 144 insertions(+), 100 deletions(-) diff --git a/src/python/pants/engine/internals/native_engine.pyi b/src/python/pants/engine/internals/native_engine.pyi index a10e1bf7ac1..a3da381d685 100644 --- a/src/python/pants/engine/internals/native_engine.pyi +++ b/src/python/pants/engine/internals/native_engine.pyi @@ -662,10 +662,8 @@ class PyConfigSource: # See src/rust/engine/src/externs/options.rs for the Rust-side versions of these types. T = TypeVar("T") -# List of pairs of (value, ranks of the sources of the value). -# A scalar value will always have a single source. A list/dict value -# may come from multiple sources, if its elements were appended. -OptionValueDerivation = list[Tuple[T, list[int]]] +# List of tuples of (value, rank, details string). +OptionValueDerivation = list[Tuple[T, int, str]] # A tuple (value, rank of value, optional derivation of value). OptionValue = Tuple[Optional[T], int, Optional[OptionValueDerivation]] diff --git a/src/python/pants/help/help_info_extracter.py b/src/python/pants/help/help_info_extracter.py index dc087531d5e..5ed854d32aa 100644 --- a/src/python/pants/help/help_info_extracter.py +++ b/src/python/pants/help/help_info_extracter.py @@ -1031,19 +1031,8 @@ def get_option_scope_help_info( empty_details = "" if (is_list or is_dict) else None ranked_values.append(RankedValue(Rank.NONE, empty_val, empty_details)) - for value, ranks in derivation: - if len(ranks) == 0: - rank = Rank.NONE - details = None - else: - rank = ranks[-1] - # Again, distinguishing between '' vs None in the details field - # does not matter, but we want to be consistent with the idiosyncratic - # behavior of the legacy parser, until we get rid of it. - details = ( - ", ".join(filter(None, [r.description() for r in ranks])) or empty_details - ) - ranked_values.append(RankedValue(rank, value, details)) + for value, rank, details in derivation: + ranked_values.append(RankedValue(rank, value, details or empty_details)) history = OptionValueHistory(tuple(ranked_values)) ohi = self.get_option_help_info(args, kwargs) ohi = dataclasses.replace(ohi, value_history=history) diff --git a/src/python/pants/help/help_info_extracter_test.py b/src/python/pants/help/help_info_extracter_test.py index a88e02d0ce1..dc9e110d264 100644 --- a/src/python/pants/help/help_info_extracter_test.py +++ b/src/python/pants/help/help_info_extracter_test.py @@ -324,7 +324,7 @@ def fake_consumed_scopes_mapper(scope: str) -> Tuple[str, ...]: { "rank": Rank.ENVIRONMENT, "value": [42, 99, 88], - "details": "from config, from an env var", + "details": "pants.test.toml, env var", }, ), }, @@ -388,7 +388,7 @@ def fake_consumed_scopes_mapper(scope: str) -> Tuple[str, ...]: {"details": "", "rank": Rank.NONE, "value": []}, {"details": "", "rank": Rank.HARDCODED, "value": []}, { - "details": "from command-line flag", + "details": "command-line flag", "rank": Rank.FLAG, "value": ["internal_plugins.releases"], }, @@ -647,7 +647,7 @@ def fake_consumed_scopes_mapper(scope: str) -> Tuple[str, ...]: { "rank": Rank.ENVIRONMENT, "value": [42, 99, 88], - "details": "from config, from an env var", + "details": "pants.test.toml, env var", }, ), }, @@ -711,7 +711,7 @@ def fake_consumed_scopes_mapper(scope: str) -> Tuple[str, ...]: {"details": "", "rank": Rank.NONE, "value": []}, {"details": "", "rank": Rank.HARDCODED, "value": []}, { - "details": "from command-line flag", + "details": "command-line flag", "rank": Rank.FLAG, "value": ["internal_plugins.releases"], }, diff --git a/src/python/pants/option/native_options.py b/src/python/pants/option/native_options.py index b622e84b396..43af47d7fbf 100644 --- a/src/python/pants/option/native_options.py +++ b/src/python/pants/option/native_options.py @@ -116,7 +116,7 @@ def get_value(self, *, scope, registration_args, registration_kwargs) -> Tuple[A def get_derivation( self, *, scope, registration_args, registration_kwargs - ) -> list[Tuple[Any, list[Rank]]]: + ) -> list[Tuple[Any, Rank, Optional[str]]]: _, _, derivation = self._get_value_and_derivation( scope, registration_args, registration_kwargs ) @@ -124,7 +124,7 @@ def get_derivation( def _get_value_and_derivation( self, scope, registration_args, registration_kwargs - ) -> Tuple[Any, Rank, list[Tuple[Any, list[Rank]]]]: + ) -> Tuple[Any, Rank, list[Tuple[Any, Rank, Optional[str]]]]: return self._get( scope=scope, dest=parse_dest(*registration_args, **registration_kwargs), @@ -147,7 +147,7 @@ def _get( member_type=None, choices=None, passthrough=False, - ) -> Tuple[Any, Rank, list[Tuple[Any, list[Rank]]]]: + ) -> Tuple[Any, Rank, list[Tuple[Any, Rank, Optional[str]]]]: def scope_str() -> str: return "global scope" if scope == GLOBAL_SCOPE else f"scope '{scope}'" @@ -235,9 +235,7 @@ def process_value(v): return v if derivation: - derivation = [ - (process_value(v), [self.int_to_rank[r] for r in rs]) for (v, rs) in derivation - ] + derivation = [(process_value(v), self.int_to_rank[r], d) for (v, r, d) in derivation] if val is not None: val = process_value(val) diff --git a/src/rust/engine/src/externs/options.rs b/src/rust/engine/src/externs/options.rs index 82c04b2c569..d0785ea77b5 100644 --- a/src/rust/engine/src/externs/options.rs +++ b/src/rust/engine/src/externs/options.rs @@ -10,6 +10,7 @@ use options::{ Source, Val, }; +use itertools::Itertools; use std::collections::HashMap; pyo3::import_exception!(pants.option.errors, ParseError); @@ -164,11 +165,8 @@ struct PyOptionParser(OptionParser); // The pythonic value of a dict-typed option. type PyDictVal = HashMap; -// The derivation of the option value, as a vec of (value, vec of source ranks) tuples. -// A scalar value will always have a single source. A list/dict value may have elements -// appended across multiple sources. -// In ascending rank order, so the last value is the final value of the option. -type OptionValueDerivation = Vec<(T, Vec)>; +// The derivation of the option value, as a vec of (value, rank, details string) tuples. +type OptionValueDerivation<'py, T> = Vec<(T, isize, Option>)>; // A tuple (final value, rank of final value, optional derivation of value). // @@ -177,19 +175,50 @@ type OptionValueDerivation = Vec<(T, Vec)>; // We could get rid of this tuple type by representing the final value and its rank as // a singleton derivation (in the case where we don't otherwise need the full derivation). // But that would allocate two unnecessary Vecs for every option. -type OptionValue = (Option, isize, Option>); +type OptionValue<'py, T> = (Option, isize, Option>); + +fn source_to_details(source: &Source) -> Option<&str> { + match source { + Source::Default => None, + Source::Config { ordinal: _, path } => Some(path), + Source::Env => Some("env var"), + Source::Flag => Some("command-line flag"), + } +} + +fn to_details<'py>(py: Python<'py>, sources: Vec<&'py Source>) -> Option> { + if sources.is_empty() { + return None; + } + if sources.len() == 1 { + return source_to_details(sources.first().unwrap()).map(|s| PyString::intern_bound(py, s)); + } + #[allow(unstable_name_collisions)] + // intersperse is provided by itertools::Itertools, but is also in the Rust nightly + // as an experimental feature of standard Iterator. If/when that becomes standard we + // can use it, but for now we must squelch the name collision. + Some(PyString::intern_bound( + py, + &sources + .into_iter() + .filter_map(source_to_details) + .intersperse(", ") + .collect::(), + )) +} // Condense list value derivation across sources, so that it reflects merges vs. replacements // in a useful way. E.g., if we merge [a, b] and [c], and then replace it with [d, e], // the derivation will show: // - [d, e] (from command-line flag) // - [a, b, c] (from env var, from config) -fn condense_list_value_derivation( - derivation: Vec<(&Source, Vec>)>, -) -> OptionValueDerivation> { - let mut ret: OptionValueDerivation> = vec![]; - let mut cur_group = vec![]; - let mut cur_ranks = vec![]; +fn condense_list_value_derivation<'py, T: PartialEq>( + py: Python<'py>, + derivation: Vec<(&'py Source, Vec>)>, +) -> OptionValueDerivation<'py, Vec> { + let mut ret: OptionValueDerivation<'py, Vec> = vec![]; + let mut cur_group: Vec> = vec![]; + let mut cur_sources: Vec<&Source> = vec![]; // In this case, for simplicity, we always use the "inefficient" O(M*N) remover, // even for hashable values. This is very unlikely to have a noticeable performance impact @@ -203,23 +232,25 @@ fn condense_list_value_derivation( for (source, list_edits) in derivation.into_iter() { for list_edit in list_edits { if list_edit.action == ListEditAction::Replace { - if !cur_group.is_empty() { + if !cur_sources.is_empty() { ret.push(( apply_list_edits::(remover, cur_group.into_iter()), - cur_ranks, + cur_sources.last().unwrap().rank() as isize, + to_details(py, cur_sources), )); } cur_group = vec![]; - cur_ranks = vec![]; + cur_sources = vec![]; } cur_group.push(list_edit); - cur_ranks.push(source.rank() as isize); + cur_sources.push(source); } } - if !cur_group.is_empty() { + if !cur_sources.is_empty() { ret.push(( apply_list_edits::(remover, cur_group.into_iter()), - cur_ranks, + cur_sources.last().unwrap().rank() as isize, + to_details(py, cur_sources), )); } @@ -231,13 +262,13 @@ fn condense_list_value_derivation( // the derivation will show: // - {d: 4} (from command-line flag) // - {a: 1, b: 2, c: 3} (from env var, from config) -fn condense_dict_value_derivation( - py: Python, - derivation: Vec<(&Source, Vec)>, -) -> PyResult> { - let mut ret: OptionValueDerivation = vec![]; - let mut cur_group = vec![]; - let mut cur_ranks = vec![]; +fn condense_dict_value_derivation<'py>( + py: Python<'py>, + derivation: Vec<(&'py Source, Vec)>, +) -> PyResult> { + let mut ret: OptionValueDerivation<'py, PyDictVal> = vec![]; + let mut cur_group: Vec = vec![]; + let mut cur_sources: Vec<&Source> = vec![]; for (source, dict_edits) in derivation.into_iter() { for dict_edit in dict_edits { @@ -245,34 +276,39 @@ fn condense_dict_value_derivation( if !cur_group.is_empty() { ret.push(( dict_into_py(py, apply_dict_edits(cur_group.into_iter()))?, - cur_ranks, + cur_sources.last().unwrap().rank() as isize, + to_details(py, cur_sources), )); } cur_group = vec![]; - cur_ranks = vec![]; + cur_sources = vec![]; } cur_group.push(dict_edit); - cur_ranks.push(source.rank() as isize); + cur_sources.push(source); } } if !cur_group.is_empty() { ret.push(( dict_into_py(py, apply_dict_edits(cur_group.into_iter()))?, - cur_ranks, + cur_sources.last().unwrap().rank() as isize, + to_details(py, cur_sources), )); } Ok(ret) } -fn into_py(res: Result, String>) -> PyResult> { +fn into_py<'py, T>( + py: Python<'py>, + res: Result, String>, +) -> PyResult> { let val = res.map_err(ParseError::new_err)?; Ok(( val.value, val.source.rank() as isize, val.derivation.map(|d| { d.into_iter() - .map(|(source, val)| (val, vec![source.rank() as isize])) + .map(|(source, val)| (val, source.rank() as isize, to_details(py, vec![source]))) .collect() }), )) @@ -280,16 +316,17 @@ fn into_py(res: Result, String>) -> PyResult( - &'a self, + fn get_list<'py, T: ToOwned + ?Sized>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Vec, getter: fn( - &'a OptionParser, + &'py OptionParser, &OptionId, Vec, - ) -> Result, String>, - ) -> PyResult>> + ) -> Result, String>, + ) -> PyResult>> where ::Owned: PartialEq, { @@ -298,7 +335,9 @@ impl PyOptionParser { Ok(( Some(opt_val.value), opt_val.source.rank() as isize, - opt_val.derivation.map(condense_list_value_derivation), + opt_val + .derivation + .map(|d| condense_list_value_derivation(py, d)), )) } } @@ -333,87 +372,107 @@ impl PyOptionParser { } #[pyo3(signature = (option_id, default))] - fn get_bool( - &self, + fn get_bool<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Option, - ) -> PyResult> { - into_py(self.0.parse_bool_optional(&option_id.borrow().0, default)) + ) -> PyResult> { + into_py( + py, + self.0.parse_bool_optional(&option_id.borrow().0, default), + ) } #[pyo3(signature = (option_id, default))] - fn get_int( - &self, + fn get_int<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Option, - ) -> PyResult> { - into_py(self.0.parse_int_optional(&option_id.borrow().0, default)) + ) -> PyResult> { + into_py( + py, + self.0.parse_int_optional(&option_id.borrow().0, default), + ) } #[pyo3(signature = (option_id, default))] - fn get_float( - &self, + fn get_float<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Option, - ) -> PyResult> { - into_py(self.0.parse_float_optional(&option_id.borrow().0, default)) + ) -> PyResult> { + into_py( + py, + self.0.parse_float_optional(&option_id.borrow().0, default), + ) } #[pyo3(signature = (option_id, default))] - fn get_string( - &self, + fn get_string<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Option<&str>, - ) -> PyResult> { - into_py(self.0.parse_string_optional(&option_id.borrow().0, default)) + ) -> PyResult> { + into_py( + py, + self.0.parse_string_optional(&option_id.borrow().0, default), + ) } - fn get_bool_list( - &self, + fn get_bool_list<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Vec, - ) -> PyResult>> { - self.get_list::(option_id, default, |op, oid, def| { + ) -> PyResult>> { + self.get_list::(py, option_id, default, |op, oid, def| { op.parse_bool_list(oid, def) }) } - fn get_int_list( - &self, + fn get_int_list<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Vec, - ) -> PyResult>> { - self.get_list::(option_id, default, |op, oid, def| { + ) -> PyResult>> { + self.get_list::(py, option_id, default, |op, oid, def| { op.parse_int_list(oid, def) }) } - fn get_float_list( - &self, + fn get_float_list<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Vec, - ) -> PyResult>> { - self.get_list::(option_id, default, |op, oid, def| { + ) -> PyResult>> { + self.get_list::(py, option_id, default, |op, oid, def| { op.parse_float_list(oid, def) }) } - fn get_string_list( - &self, + fn get_string_list<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: Vec, - ) -> PyResult>> { - self.get_list::(option_id, default, |op, oid, def| { + ) -> PyResult>> { + self.get_list::(py, option_id, default, |op, oid, def| { op.parse_string_list(oid, def) }) } - fn get_dict( - &self, - py: Python, + fn get_dict<'py>( + &'py self, + py: Python<'py>, option_id: &Bound<'_, PyOptionId>, default: &Bound<'_, PyDict>, - ) -> PyResult> { + ) -> PyResult> { let default = default .items() .into_iter()