diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index a3f36c5226719..41de655a7cb32 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -20,17 +20,18 @@ from __future__ import annotations -import itertools import json import platform import subprocess +from collections.abc import Generator from enum import Enum +from pathlib import Path from airflow_breeze.utils.functools_cache import clearable_cache from airflow_breeze.utils.host_info_utils import Architecture from airflow_breeze.utils.path_utils import ( AIRFLOW_CORE_SOURCES_PATH, - AIRFLOW_PROVIDERS_ROOT_PATH, + AIRFLOW_PYPROJECT_TOML_FILE_PATH, AIRFLOW_ROOT_PATH, ) @@ -554,9 +555,6 @@ def get_airflow_extras(): # Initialize integrations -ALL_PYPROJECT_TOML_FILES = AIRFLOW_ROOT_PATH.rglob("pyproject.toml") -ALL_PROVIDER_YAML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml") -ALL_PROVIDER_PYPROJECT_TOML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml") PROVIDER_RUNTIME_DATA_SCHEMA_PATH = AIRFLOW_CORE_SOURCES_PATH / "airflow" / "provider_info.schema.json" AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH = AIRFLOW_ROOT_PATH / "generated" / "provider_dependencies.json" AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH = ( @@ -567,12 +565,34 @@ def get_airflow_extras(): AIRFLOW_ROOT_PATH / "scripts" / "ci" / "pre_commit" / "update_providers_dependencies.py" ) +ALL_PYPROJECT_TOML_FILES = [] + + +def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path, None, None]: + pyproject_toml_content = AIRFLOW_PYPROJECT_TOML_FILE_PATH.read_text().splitlines() + in_workspace = False + for line in pyproject_toml_content: + trimmed_line = line.strip() + if not in_workspace and trimmed_line.startswith("[tool.uv.workspace]"): + in_workspace = True + elif in_workspace: + if trimmed_line.startswith("#"): + continue + if trimmed_line.startswith('"'): + path = trimmed_line.split('"')[1] + ALL_PYPROJECT_TOML_FILES.append(AIRFLOW_ROOT_PATH / path / "pyproject.toml") + if trimmed_line.startswith('"providers/'): + yield AIRFLOW_ROOT_PATH / path / "pyproject.toml" + yield AIRFLOW_ROOT_PATH / path / "provider.yaml" + elif trimmed_line.startswith("]"): + break + def _calculate_provider_deps_hash(): import hashlib hasher = hashlib.sha256() - for file in sorted(itertools.chain(ALL_PROVIDER_PYPROJECT_TOML_FILES, ALL_PROVIDER_YAML_FILES)): + for file in sorted(get_all_provider_pyproject_toml_provider_yaml_files()): hasher.update(file.read_bytes()) return hasher.hexdigest() diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 72440e569944f..47a9cbd95e133 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -279,6 +279,7 @@ def find_airflow_root_path_to_operate_on() -> Path: AIRFLOW_ROOT_PATH = find_airflow_root_path_to_operate_on().resolve() +AIRFLOW_PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" AIRFLOW_CORE_ROOT_PATH = AIRFLOW_ROOT_PATH / "airflow-core" AIRFLOW_CORE_SOURCES_PATH = AIRFLOW_CORE_ROOT_PATH / "src" AIRFLOW_WWW_DIR = AIRFLOW_CORE_SOURCES_PATH / "airflow" / "www" diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index bd429defc7eea..d445243cc4f8d 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -146,6 +146,7 @@ _airflow_sources = os.getenv("AIRFLOW_SOURCES", None) AIRFLOW_ROOT_PATH = (Path(_airflow_sources) if _airflow_sources else Path(__file__).parents[3]).resolve() +AIRFLOW_PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" AIRFLOW_CORE_SOURCES_PATH = AIRFLOW_ROOT_PATH / "airflow-core" / "src" AIRFLOW_CORE_TESTS_PATH = AIRFLOW_ROOT_PATH / "airflow-core" / "tests" AIRFLOW_PROVIDERS_ROOT_PATH = AIRFLOW_ROOT_PATH / "providers" @@ -156,17 +157,37 @@ UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = ( AIRFLOW_ROOT_PATH / "scripts" / "ci" / "pre_commit" / "update_providers_dependencies.py" ) -ALL_PROVIDER_YAML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml") -ALL_PROVIDER_PYPROJECT_TOML_FILES = AIRFLOW_PROVIDERS_ROOT_PATH.rglob("provider.yaml") - # Deliberately copied from breeze - we want to keep it in sync but we do not want to import code from # Breeze here as we want to do it quickly +ALL_PYPROJECT_TOML_FILES = [] + + +def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path, None, None]: + pyproject_toml_content = AIRFLOW_PYPROJECT_TOML_FILE_PATH.read_text().splitlines() + in_workspace = False + for line in pyproject_toml_content: + trimmed_line = line.strip() + if not in_workspace and trimmed_line.startswith("[tool.uv.workspace]"): + in_workspace = True + elif in_workspace: + if trimmed_line.startswith("#"): + continue + if trimmed_line.startswith('"'): + path = trimmed_line.split('"')[1] + ALL_PYPROJECT_TOML_FILES.append(AIRFLOW_ROOT_PATH / path / "pyproject.toml") + if trimmed_line.startswith('"providers/'): + yield AIRFLOW_ROOT_PATH / path / "pyproject.toml" + yield AIRFLOW_ROOT_PATH / path / "provider.yaml" + elif trimmed_line.startswith("]"): + break + + def _calculate_provider_deps_hash(): import hashlib hasher = hashlib.sha256() - for file in sorted(itertools.chain(ALL_PROVIDER_PYPROJECT_TOML_FILES, ALL_PROVIDER_YAML_FILES)): + for file in sorted(get_all_provider_pyproject_toml_provider_yaml_files()): hasher.update(file.read_bytes()) return hasher.hexdigest() @@ -415,8 +436,17 @@ def initialize_airflow_tests(request): sys.exit(1) +# for performance reasons, we do not want to rglob deprecation ignore files +# because in MacOS in docker it takes a lot of time to rglob them +# so we opt to hardcode the paths here +DEPRECATIONS_IGNORE_FILES = [ + AIRFLOW_CORE_TESTS_PATH / "deprecations_ignore.yml", + AIRFLOW_ROOT_PATH / "providers" / "google" / "tests" / "deprecations_ignore.yml", +] + + def _find_all_deprecation_ignore_files() -> list[str]: - all_deprecation_ignore_files = AIRFLOW_ROOT_PATH.rglob("deprecations_ignore.yml") + all_deprecation_ignore_files = DEPRECATIONS_IGNORE_FILES.copy() return list(path.as_posix() for path in all_deprecation_ignore_files)