From 57d397d6c1c441aea3fab67d0ceed87806e11720 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Tue, 5 Nov 2024 12:01:30 +0100 Subject: [PATCH] Fix reproducibility of prepared provider packages (fix flit frontend) (#43683) After some checks it turned out that reproducibility of produced packages depends not only on the build backend configured for the project but also on the build front-end used - because frontend is the one to modify meta-data in prepared packages - including the build tool used, it's version and metadata version supported by the front-end. That's why in order to maintain reproducibility for anyone who builds the packages, we have to pin not only the build backend in pyproject.toml (flit-core) but also build fronted used (flit). Since package preparation is done with breeze, we can do it by pinning flit (and just in case also flit-core) so that anyone who builds specific version of the package will use exactly the same flit as the person who built the original packages. This way we will avoid reproducibility problems experienced with 1.5.0 release of FAB. (cherry picked from commit 18ea01cef2b92fe820ceaa33be7b44f9f576aad4) --- dev/README_RELEASE_PROVIDER_PACKAGES.md | 1 - dev/breeze/README.md | 2 +- dev/breeze/doc/images/output_build-docs.svg | 28 +++++++++---------- dev/breeze/doc/images/output_build-docs.txt | 2 +- dev/breeze/doc/images/output_prod-image.svg | 2 +- dev/breeze/doc/images/output_prod-image.txt | 2 +- .../doc/images/output_prod-image_build.txt | 2 +- dev/breeze/doc/images/output_setup.svg | 2 +- dev/breeze/doc/images/output_setup.txt | 2 +- .../doc/images/output_setup_autocomplete.svg | 10 +++---- .../doc/images/output_setup_autocomplete.txt | 2 +- dev/breeze/doc/images/output_setup_config.txt | 2 +- .../doc/images/output_start-airflow.txt | 2 +- dev/breeze/pyproject.toml | 15 +++++++++- .../commands/release_candidate_command.py | 1 - .../commands/release_management_commands.py | 4 --- .../airflow_breeze/commands/setup_commands.py | 26 ++++++++++++++++- .../templates/pyproject_TEMPLATE.toml.jinja2 | 3 +- .../airflow_breeze/utils/python_versions.py | 8 +----- .../src/airflow_breeze/utils/reproducible.py | 3 -- dev/breeze/uv.lock | 4 ++- 21 files changed, 73 insertions(+), 50 deletions(-) diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 749f89e106320..25aa8062c7722 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -335,7 +335,6 @@ export AIRFLOW_REPO_ROOT=$(pwd -P) rm -rf ${AIRFLOW_REPO_ROOT}/dist/* ``` - * Release candidate packages: ```shell script diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 2c38aa7c1a95e..713bf7ce83fd3 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: f8e8729f4236f050d4412cbbc9d53fdd4e6ddad65ce5fafd3c5b6fcdacbea5431eea760b961534a63fd5733b072b38e8167b5b0c12ee48b31c3257306ef11940 +Package config hash: d1d07397099e14c5fc5f0b0e13a87ac8e112bf66755f77cee62b29151cd18c2f2d35932906db6b3885af652defddce696ef9b2df58e21bd3a7749bca82baf910 --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/doc/images/output_build-docs.svg b/dev/breeze/doc/images/output_build-docs.svg index e270c6b92e997..6fa9017144472 100644 --- a/dev/breeze/doc/images/output_build-docs.svg +++ b/dev/breeze/doc/images/output_build-docs.svg @@ -203,32 +203,32 @@ Build documents. ╭─ Doc flags ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---docs-only-dOnly build documentation. ---spellcheck-only-sOnly run spell checking. ---clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx     +--docs-only-dOnly build documentation. +--spellcheck-only-sOnly run spell checking. +--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx     artifacts before the build - useful for a clean build.                            ---one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx        +--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx        errors.                                                                           ---package-filterFilter(s) to use more than one can be specified. You can use glob pattern         +--package-filterFilter(s) to use more than one can be specified. You can use glob pattern         matching the full package name, for example `apache-airflow-providers-*`. Useful  when you want to selectseveral similarly named packages together.                 (TEXT)                                                                            ---include-not-ready-providersWhether to include providers that are not yet ready to be released. ---include-removed-providersWhether to include providers that are removed. ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +--include-not-ready-providersWhether to include providers that are not yet ready to be released. +--include-removed-providersWhether to include providers that are removed. +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) [default: autodetect]                                          ---package-listOptional, contains comma-separated list of package ids that are processed for     +--package-listOptional, contains comma-separated list of package ids that are processed for     documentation building, and document publishing. It is an easier alternative to   adding individual packages as arguments to every command. This overrides the      packages passed as arguments.                                                     (TEXT)                                                                            ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---verbose-vPrint verbose information about performed steps. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_build-docs.txt b/dev/breeze/doc/images/output_build-docs.txt index 760b6b3d09826..85554fb426c0a 100644 --- a/dev/breeze/doc/images/output_build-docs.txt +++ b/dev/breeze/doc/images/output_build-docs.txt @@ -1 +1 @@ -ac6594538890f8fba65c916aa8672aa1 +91166ce4114ea9c162c139d2aff15886 diff --git a/dev/breeze/doc/images/output_prod-image.svg b/dev/breeze/doc/images/output_prod-image.svg index 6b907c07a6b27..ef8e95626d14a 100644 --- a/dev/breeze/doc/images/output_prod-image.svg +++ b/dev/breeze/doc/images/output_prod-image.svg @@ -98,7 +98,7 @@ Tools that developers can use to manually manage PROD images ╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. +--help-hShow this message and exit. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Production Image tools ─────────────────────────────────────────────────────────────────────────────────────────────╮ build  Build Production image. Include building multiple images for all or selected Python versions sequentially.  diff --git a/dev/breeze/doc/images/output_prod-image.txt b/dev/breeze/doc/images/output_prod-image.txt index 4e4ac97bd602d..c767ee09d4fd3 100644 --- a/dev/breeze/doc/images/output_prod-image.txt +++ b/dev/breeze/doc/images/output_prod-image.txt @@ -1 +1 @@ -55030fe0d7718eb668fa1a37128647b0 +d91bcc76b14f186e749efe2c6aaa8682 diff --git a/dev/breeze/doc/images/output_prod-image_build.txt b/dev/breeze/doc/images/output_prod-image_build.txt index e1e2a2c9c6c7f..1645f4d547baa 100644 --- a/dev/breeze/doc/images/output_prod-image_build.txt +++ b/dev/breeze/doc/images/output_prod-image_build.txt @@ -1 +1 @@ -88290b22adcd4e5cc9da29aaa8467992 +c243f4de16bc858f6202d88922f00109 diff --git a/dev/breeze/doc/images/output_setup.svg b/dev/breeze/doc/images/output_setup.svg index c747a1eea7f38..5dda408adefbc 100644 --- a/dev/breeze/doc/images/output_setup.svg +++ b/dev/breeze/doc/images/output_setup.svg @@ -110,7 +110,7 @@ Tools that developers can use to configure Breeze ╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---help-hShow this message and exit. +--help-hShow this message and exit. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Setup ──────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ autocomplete                   Enables autocompletion of breeze commands.                                          diff --git a/dev/breeze/doc/images/output_setup.txt b/dev/breeze/doc/images/output_setup.txt index b8f9048b91f0b..274751197daaf 100644 --- a/dev/breeze/doc/images/output_setup.txt +++ b/dev/breeze/doc/images/output_setup.txt @@ -1 +1 @@ -d4a4f1b405f912fa234ff4116068290a +08c78d9dddd037a2ade6b751c5a22ff9 diff --git a/dev/breeze/doc/images/output_setup_autocomplete.svg b/dev/breeze/doc/images/output_setup_autocomplete.svg index e118e1fced9a8..31f7814001faa 100644 --- a/dev/breeze/doc/images/output_setup_autocomplete.svg +++ b/dev/breeze/doc/images/output_setup_autocomplete.svg @@ -102,13 +102,13 @@ Enables autocompletion of breeze commands. ╭─ Setup autocomplete flags ───────────────────────────────────────────────────────────────────────────────────────────╮ ---force-fForce autocomplete setup even if already setup before (overrides the setup). +--force-fForce autocomplete setup even if already setup before (overrides the setup). ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. +--verbose-vPrint verbose information about performed steps. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_setup_autocomplete.txt b/dev/breeze/doc/images/output_setup_autocomplete.txt index 185feef026464..144c2613cd695 100644 --- a/dev/breeze/doc/images/output_setup_autocomplete.txt +++ b/dev/breeze/doc/images/output_setup_autocomplete.txt @@ -1 +1 @@ -fffcd49e102e09ccd69b3841a9e3ea8e +ec3b4541a478afe5cb86a6f1c48f50f5 diff --git a/dev/breeze/doc/images/output_setup_config.txt b/dev/breeze/doc/images/output_setup_config.txt index 3b2da9a9c043c..695e4b5c871eb 100644 --- a/dev/breeze/doc/images/output_setup_config.txt +++ b/dev/breeze/doc/images/output_setup_config.txt @@ -1 +1 @@ -96e10564034b282769a2c48ebf7176e2 +e77da96b508cc4911857d6f1266802b5 diff --git a/dev/breeze/doc/images/output_start-airflow.txt b/dev/breeze/doc/images/output_start-airflow.txt index 31367c64bfa37..428a70cf0c0a2 100644 --- a/dev/breeze/doc/images/output_start-airflow.txt +++ b/dev/breeze/doc/images/output_start-airflow.txt @@ -1 +1 @@ -2fdb4b01e6d949fb40993e3cc416ca5c +834ca1bef0a55889bfccfeb41738a2f6 diff --git a/dev/breeze/pyproject.toml b/dev/breeze/pyproject.toml index 32b3e1fbe6e32..e7bdbb4db08c6 100644 --- a/dev/breeze/pyproject.toml +++ b/dev/breeze/pyproject.toml @@ -48,7 +48,20 @@ dependencies = [ "black>=23.11.0", "click>=8.1.7", "filelock>=3.13.0", - "flit>=3.5.0", + # + # We pin flit in order to make sure reproducibility of provider packages is maintained + # It turns out that when packages are prepared metadata version in the produced packages + # is taken from the front-end not from the backend, so in order to make sure that the + # packages are reproducible, we should pin both backend in "build-system" and frontend in + # "dependencies" of the environment that is used to build the packages. + # + # TODO(potiuk): automate bumping the version of flit in breeze and sync it with + # the version in the template for provider packages with pre-commit also add instructions in + # the source packages explaining that reproducibility can only be achieved by using the same + # version of flit front-end to build the package + # + "flit==3.10.1", + "flit-core==3.10.1", "gitpython>=3.1.40", "hatch==1.9.4", # Importib_resources 6.2.0-6.3.1 break pytest_rewrite diff --git a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py index 8c5c449ed7e86..697526c1af3f8 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py +++ b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py @@ -341,7 +341,6 @@ def remove_old_releases(version, repo_root): "--version", required=True, help="The release candidate version e.g. 2.4.3rc1", envvar="VERSION" ) def prepare_airflow_tarball(version: str): - check_python_version() from packaging.version import Version airflow_version = Version(version) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index ce9ec44e57960..9defbe7ef4d41 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -520,7 +520,6 @@ def prepare_airflow_packages( version_suffix_for_pypi: str, use_local_hatch: bool, ): - check_python_version() perform_environment_checks() fix_ownership_using_docker() cleanup_python_generated_files() @@ -3067,7 +3066,6 @@ def prepare_helm_chart_tarball( ) -> None: import yaml - check_python_version() chart_yaml_file_content = CHART_YAML_FILE.read_text() chart_yaml_dict = yaml.safe_load(chart_yaml_file_content) version_in_chart = chart_yaml_dict["version"] @@ -3209,8 +3207,6 @@ def prepare_helm_chart_tarball( @option_dry_run @option_verbose def prepare_helm_chart_package(sign_email: str): - check_python_version() - import yaml from airflow_breeze.utils.kubernetes_utils import ( diff --git a/dev/breeze/src/airflow_breeze/commands/setup_commands.py b/dev/breeze/src/airflow_breeze/commands/setup_commands.py index bc1ac4f1fa56b..f0d1e4eac7c94 100644 --- a/dev/breeze/src/airflow_breeze/commands/setup_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/setup_commands.py @@ -22,6 +22,7 @@ import shutil import subprocess import sys +import textwrap from copy import copy from pathlib import Path from typing import Any @@ -274,8 +275,31 @@ def get_status(file: str): get_console().print() -def dict_hash(dictionary: dict[str, Any]) -> str: +def dedent_help(dictionary: dict[str, Any]) -> None: + """ + Dedent help stored in the dictionary. + + Python 3.13 automatically dedents docstrings retrieved from functions. + See https://github.com/python/cpython/issues/81283 + + However, click uses docstrings in the absence of help strings, and we are using click + command definition dictionary hash to detect changes in the command definitions, so if the + help strings are not dedented, the hash will change. + + That's why we must de-dent all the help strings in the command definition dictionary + before we hash it. + """ + for key, value in dictionary.items(): + if isinstance(value, dict): + dedent_help(value) + elif key == "help" and isinstance(value, str): + dictionary[key] = textwrap.dedent(value) + + +def dict_hash(dictionary: dict[str, Any], dedent_help_strings: bool = True) -> str: """MD5 hash of a dictionary. Sorted and dumped via json to account for random sequence)""" + if dedent_help_strings: + dedent_help(dictionary) # noinspection InsecureHash dhash = hashlib.md5() try: diff --git a/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 index 389d2ce62e578..a375ffedc63ef 100644 --- a/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 +++ b/dev/breeze/src/airflow_breeze/templates/pyproject_TEMPLATE.toml.jinja2 @@ -39,9 +39,8 @@ # IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE # `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY -# [build-system] -requires = ["flit_core >=3.2,<4"] +requires = ["flit_core==3.10.1"] build-backend = "flit_core.buildapi" [project] diff --git a/dev/breeze/src/airflow_breeze/utils/python_versions.py b/dev/breeze/src/airflow_breeze/utils/python_versions.py index b8807e66bf87c..d84c4f932ba8d 100644 --- a/dev/breeze/src/airflow_breeze/utils/python_versions.py +++ b/dev/breeze/src/airflow_breeze/utils/python_versions.py @@ -46,16 +46,10 @@ def get_python_version_list(python_versions: str) -> list[str]: def check_python_version(): - error = False if not sys.version_info >= (3, 9): get_console().print("[error]At least Python 3.9 is required to prepare reproducible archives.\n") - error = True - elif not sys.version_info < (3, 12): - get_console().print("[error]Python 3.12 is not supported.\n") - error = True - if error: get_console().print( - "[warning]Please reinstall Breeze using Python 3.9 - 3.11 environment.[/]\n\n" + "[warning]Please reinstall Breeze using Python 3.9 - 3.12 environment.[/]\n\n" "If you are using uv:\n\n" " uv tool install --force --reinstall --python 3.9 -e ./dev/breeze\n\n" "If you are using pipx:\n\n" diff --git a/dev/breeze/src/airflow_breeze/utils/reproducible.py b/dev/breeze/src/airflow_breeze/utils/reproducible.py index 1429333d64152..cf4005d9ddd10 100644 --- a/dev/breeze/src/airflow_breeze/utils/reproducible.py +++ b/dev/breeze/src/airflow_breeze/utils/reproducible.py @@ -43,7 +43,6 @@ from subprocess import CalledProcessError, CompletedProcess from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, OUT_DIR, REPRODUCIBLE_DIR -from airflow_breeze.utils.python_versions import check_python_version from airflow_breeze.utils.run_utils import run_command @@ -91,7 +90,6 @@ def reset(tarinfo): tarinfo.mtime = timestamp return tarinfo - check_python_version() OUT_DIR.mkdir(exist_ok=True) shutil.rmtree(REPRODUCIBLE_DIR, ignore_errors=True) REPRODUCIBLE_DIR.mkdir(exist_ok=True) @@ -149,7 +147,6 @@ def reset(tarinfo): def main(): - check_python_version() parser = ArgumentParser() parser.add_argument("-a", "--archive", help="archive to repack") parser.add_argument("-o", "--out", help="archive destination") diff --git a/dev/breeze/uv.lock b/dev/breeze/uv.lock index 666cb37805254..a5a252063646c 100644 --- a/dev/breeze/uv.lock +++ b/dev/breeze/uv.lock @@ -25,6 +25,7 @@ dependencies = [ { name = "click" }, { name = "filelock" }, { name = "flit" }, + { name = "flit-core" }, { name = "gitpython" }, { name = "hatch" }, { name = "importlib-resources", marker = "python_full_version < '3.9'" }, @@ -53,7 +54,8 @@ requires-dist = [ { name = "black", specifier = ">=23.11.0" }, { name = "click", specifier = ">=8.1.7" }, { name = "filelock", specifier = ">=3.13.0" }, - { name = "flit", specifier = ">=3.5.0" }, + { name = "flit", specifier = "==3.10.1" }, + { name = "flit-core", specifier = "==3.10.1" }, { name = "gitpython", specifier = ">=3.1.40" }, { name = "hatch", specifier = "==1.9.4" }, { name = "importlib-resources", marker = "python_full_version < '3.9'", specifier = ">=5.2,!=6.2.0,!=6.3.0,!=6.3.1" },