From c7c69b1419a4a37f52cff77d21b9a9f5decf8f03 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Thu, 18 Apr 2024 17:53:58 +0200 Subject: [PATCH] Add cached `_split_line_selector` to avoid redundant parsing in `select_lines` (#5237) Co-authored-by: Marcel Bargull --- conda_build/metadata.py | 87 +++++++++++++++++++++++----------- news/5237-select_lines-caching | 19 ++++++++ tests/test_metadata.py | 22 +++++---- 3 files changed, 91 insertions(+), 37 deletions(-) create mode 100644 news/5237-select_lines-caching diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 4347dc8842..dd02f9a65c 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -45,7 +45,7 @@ ) if TYPE_CHECKING: - from typing import Literal + from typing import Any, Literal try: import yaml @@ -277,38 +277,68 @@ def eval_selector(selector_string, namespace, variants_in_place): return eval_selector(next_string, namespace, variants_in_place) -def select_lines(data, namespace, variants_in_place): - lines = [] - - for i, line in enumerate(data.splitlines()): +@lru_cache(maxsize=None) +def _split_line_selector(text: str) -> tuple[tuple[str | None, str], ...]: + lines: list[tuple[str | None, str]] = [] + for line in text.splitlines(): line = line.rstrip() + # skip comment lines, include a blank line as a placeholder + if line.lstrip().startswith("#"): + lines.append((None, "")) + continue + + # include blank lines + if not line: + lines.append((None, "")) + continue + + # user may have quoted entire line to make YAML happy trailing_quote = "" if line and line[-1] in ("'", '"'): trailing_quote = line[-1] - if line.lstrip().startswith("#"): - # Don't bother with comment only lines - continue - m = sel_pat.match(line) - if m: - cond = m.group(3) - try: - if eval_selector(cond, namespace, variants_in_place): - lines.append(m.group(1) + trailing_quote) - except Exception as e: - sys.exit( - """\ -Error: Invalid selector in meta.yaml line %d: -offending line: -%s -exception: -%s -""" - % (i + 1, line, str(e)) - ) + # Checking for "[" and "]" before regex matching every line is a bit faster. + if ( + ("[" in line and "]" in line) + and (match := sel_pat.match(line)) + and (selector := match.group(3)) + ): + # found a selector + lines.append((selector, (match.group(1) + trailing_quote).rstrip())) else: + # no selector found + lines.append((None, line)) + return tuple(lines) + + +def select_lines(text: str, namespace: dict[str, Any], variants_in_place: bool) -> str: + lines = [] + selector_cache: dict[str, bool] = {} + for i, (selector, line) in enumerate(_split_line_selector(text)): + if not selector: + # no selector? include line as is lines.append(line) + else: + # include lines with a selector that evaluates to True + try: + if selector_cache[selector]: + lines.append(line) + except KeyError: + # KeyError: cache miss + try: + value = bool(eval_selector(selector, namespace, variants_in_place)) + selector_cache[selector] = value + if value: + lines.append(line) + except Exception as e: + sys.exit( + f"Error: Invalid selector in meta.yaml line {i + 1}:\n" + f"offending line:\n" + f"{line}\n" + f"exception:\n" + f"{e.__class__.__name__}: {e}\n" + ) return "\n".join(lines) + "\n" @@ -2083,8 +2113,11 @@ def uses_vcs_in_build(self) -> Literal["git", "svn", "mercurial"] | None: return None def get_recipe_text( - self, extract_pattern=None, force_top_level=False, apply_selectors=True - ): + self, + extract_pattern: str | None = None, + force_top_level: bool = False, + apply_selectors: bool = True, + ) -> str: meta_path = self.meta_path if meta_path: recipe_text = read_meta_file(meta_path) diff --git a/news/5237-select_lines-caching b/news/5237-select_lines-caching new file mode 100644 index 0000000000..434a832350 --- /dev/null +++ b/news/5237-select_lines-caching @@ -0,0 +1,19 @@ +### Enhancements + +* Add `conda_build.metadata._split_line_selector` to cache line-selector parsed text. (#5237) + +### Bug fixes + +* + +### Deprecations + +* + +### Docs + +* + +### Other + +* diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 0f6da9b089..1b9fc34258 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -57,14 +57,14 @@ def test_uses_vcs_in_metadata(testing_workdir, testing_metadata): def test_select_lines(): lines = "\n".join( ( - "", + "", # preserve leading newline "test", "test [abc] no", "test [abc] # no", " ' test ' ", ' " test " ', - "", - "# comment line", + "", # preserve newline + "# comment line", # preserve comment line (but not the comment) "test [abc]", " 'quoted # [abc] '", ' "quoted # [abc] yes "', @@ -74,19 +74,20 @@ def test_select_lines(): "test {{ JINJA_VAR[:2] }} # stuff yes [abc]", "test {{ JINJA_VAR[:2] }} # [abc] stuff yes", '{{ environ["test"] }} # [abc]', - "", # trailing newline + "", # preserve trailing newline ) ) assert select_lines(lines, {"abc": True}, variants_in_place=True) == "\n".join( ( - "", + "", # preserve leading newline "test", "test [abc] no", "test [abc] # no", " ' test '", ' " test "', - "", + "", # preserve newline + "", # preserve comment line (but not the comment) "test", " 'quoted'", ' "quoted"', @@ -96,20 +97,21 @@ def test_select_lines(): "test {{ JINJA_VAR[:2] }}", "test {{ JINJA_VAR[:2] }}", '{{ environ["test"] }}', - "", # trailing newline + "", # preserve trailing newline ) ) assert select_lines(lines, {"abc": False}, variants_in_place=True) == "\n".join( ( - "", + "", # preserve leading newline "test", "test [abc] no", "test [abc] # no", " ' test '", ' " test "', - "", + "", # preserve newline + "", # preserve comment line (but not the comment) "test {{ JINJA_VAR[:2] }}", - "", # trailing newline + "", # preserve trailing newline ) )