From 9326440ea2ab81464c689cbfe469875d24350232 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Tue, 11 Nov 2025 21:29:42 +0100 Subject: [PATCH] [v3-1-test] Make sure regenerating provider dependencies happens only once (#58198) This has to be done in global_constants module, because regeneration can happen just during importing and if we try to do it in a separate package circular dependencies might happen. (cherry picked from commit 4bde26ffb54928821176b8412a823b46ae2bcc16) Co-authored-by: Jarek Potiuk --- .../src/airflow_breeze/global_constants.py | 35 ++++++++++++++++--- .../airflow_breeze/utils/md5_build_check.py | 6 ++-- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index e4486a6bbfc0f..3b2c36f246c46 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -26,6 +26,7 @@ from collections.abc import Generator from enum import Enum from pathlib import Path +from threading import Lock from airflow_breeze.utils.functools_cache import clearable_cache from airflow_breeze.utils.host_info_utils import Architecture @@ -649,10 +650,6 @@ def get_airflow_extras(): AIRFLOW_ROOT_PATH / "generated" / "provider_dependencies.json.sha256sum" ) -UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = ( - AIRFLOW_ROOT_PATH / "scripts" / "ci" / "prek" / "update_providers_dependencies.py" -) - ALL_PYPROJECT_TOML_FILES = [] @@ -676,6 +673,33 @@ def get_all_provider_pyproject_toml_provider_yaml_files() -> Generator[Path, Non break +_regenerate_provider_deps_lock = Lock() +_has_regeneration_of_providers_run = False + +UPDATE_PROVIDER_DEPENDENCIES_SCRIPT = ( + AIRFLOW_ROOT_PATH / "scripts" / "ci" / "prek" / "update_providers_dependencies.py" +) + + +def regenerate_provider_dependencies_once() -> None: + """Run provider dependencies regeneration once per interpreter execution. + + This function is safe to call multiple times from different modules; the + underlying command will only run once. If the underlying command fails the + CalledProcessError is propagated to the caller. + """ + global _has_regeneration_of_providers_run + with _regenerate_provider_deps_lock: + if _has_regeneration_of_providers_run: + return + # Run the regeneration command from the repository root to ensure correct + # relative paths if the script expects to be run from AIRFLOW_ROOT_PATH. + subprocess.check_call( + ["uv", "run", UPDATE_PROVIDER_DEPENDENCIES_SCRIPT.as_posix()], cwd=AIRFLOW_ROOT_PATH + ) + _has_regeneration_of_providers_run = True + + def _calculate_provider_deps_hash(): import hashlib @@ -691,7 +715,7 @@ def _run_provider_dependencies_generation(calculated_hash=None) -> dict: AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_HASH_PATH.write_text(calculated_hash) # We use regular print there as rich console might not be initialized yet here print("Regenerating provider dependencies file") - subprocess.check_call(["uv", "run", UPDATE_PROVIDER_DEPENDENCIES_SCRIPT.as_posix()]) + regenerate_provider_dependencies_once() return json.loads(AIRFLOW_GENERATED_PROVIDER_DEPENDENCIES_PATH.read_text()) @@ -720,6 +744,7 @@ def generate_provider_dependencies_if_needed(): DEVEL_DEPS_PATH = AIRFLOW_ROOT_PATH / "generated" / "devel_deps.txt" + # Initialize files for rebuild check FILES_FOR_REBUILD_CHECK = [ "Dockerfile.ci", diff --git a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py index c2bb827544033..6c0c00c462176 100644 --- a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py +++ b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py @@ -22,14 +22,13 @@ import hashlib import os -import subprocess from pathlib import Path from typing import TYPE_CHECKING from airflow_breeze.global_constants import ( ALL_PYPROJECT_TOML_FILES, FILES_FOR_REBUILD_CHECK, - UPDATE_PROVIDER_DEPENDENCIES_SCRIPT, + regenerate_provider_dependencies_once, ) from airflow_breeze.utils.console import get_console from airflow_breeze.utils.path_utils import AIRFLOW_ROOT_PATH @@ -113,7 +112,8 @@ def calculate_md5_checksum_for_files( get_console().print( [os.fspath(file.relative_to(AIRFLOW_ROOT_PATH)) for file in modified_pyproject_toml_files] ) - subprocess.check_call(["uv", "run", UPDATE_PROVIDER_DEPENDENCIES_SCRIPT.as_posix()]) + # Delegate to the shared helper that ensures regeneration runs only once + regenerate_provider_dependencies_once() for file in FILES_FOR_REBUILD_CHECK: is_modified = check_md5_sum_for_file(file, md5sum_cache_dir, update) if is_modified: