diff --git a/.gitignore b/.gitignore index d3f1810a46e48..e7fb2a5403dff 100644 --- a/.gitignore +++ b/.gitignore @@ -275,3 +275,9 @@ _api/ #while running go tests inside the go-sdk, it can generate log files for dags, ignore all logs go-sdk/**/*.log + +# E2e tests +_e2e_test_report.json + +# UV cache +.uv-cache/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0f66d77e41020..b3fd8a169f0cb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -763,6 +763,7 @@ repos: ^airflow-core/newsfragments/43349\.significant\.rst$| ^airflow-core/newsfragments/41368\.significant\.rst$| .*/dist/.*| + .rat-excludes| package-lock\.json$| ^providers/edge3/src/airflow/providers/edge3/plugins/www/pnpm-lock.yaml$ - id: check-base-operator-partial-arguments diff --git a/.rat-excludes b/.rat-excludes index 129d7210f07b1..9890235c23c1d 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -19,6 +19,7 @@ .rat-excludes .stylelintignore .stylelintrc +.env .venv requirements requirements.txt @@ -176,9 +177,150 @@ auth_generated.py www-hash.txt # go setup files -go.mod -go.sum -mocks/* +**/go.mod +**/go.sum +**/protov1/* + +# go mocks +**/mocks/* + +# Generated protobuf files +.*proto +.*pb.go +.*_grpc.pb.go # Kubernetes env .env + +# SVG files +**/*.svg + +# Doc only change marker file +**/.latest-doc-only-change.txt +**/*-gen/* + + +# Redirects +**/redirects.txt + +# Ignore files + +**/.git-blame-ignore-revs +**/.gitattributes +**/.rat-excludes +**/.gitignore +**/.prettierignore +**/.prettierrc +**/.airflowignore +**/.airflowignore_glob + + +# Vendor includes +**/_vendor/ + +# Generated files +**/*-generated.yaml +**/*-generated.py +**/generated.py +**/generated/* +**/auth_generated.py + +# Lock files +**/pnpm-lock.yaml +**/yarn.lock +**/Chart.lock +**/uv.lock + +# Generated UI files +**/ui/index.html +**/ui/dev/index.html +**/ui/dist/index.html +**/_private_ui.yaml +**/dist/** +**/www/index.html + +# PNG files +**/*.png + +# CSV files +**/*.csv + +# LICENCE files +**/LICENCE*.txt +**/LICENSE*.txt + + +# Checksum files +**/*.sha256 +**/*.md5sum + +# Requirement files +**/requirements.txt + +# Hashes +**/command_hashes.txt +**/www-hash.txt + +# Spelling wordlist +**/spelling_wordlist.txt +**/dictionary.txt + +# Empty files +**/empty.txt + +# Script files +**/script +**/script.bteq +**/script_utf16.bteq + +# Reproducible build files +**/reproducible_build.yaml + +# Other files +**/test_notifier.txt +**/email.html +**/*.log +**/example_upload.txt +**/dummy.pdf +**/java_streaming_src/* +**/kube_config +**/prod_image_installed_providers.txt +**/text.txt +**/newsfragments/** +**/warnings.txt +**/rtd-deprecation/404.html +**/.env +**/*.jsonl + +# API files +**/_api/** +**/node_modules/** + +# Doc files +/docs/.latest-doc-only-change.txt +/docs/redirects.txt +/docs/integration-logos/*.svg +/docs/img/*.md5sum +/docs/img/*.svg + +# Log files +*.log + +# md5 sum files +.*\.md5sum + +# Generated files +*generated.* +/src/airflow/providers/keycloak/auth_manager/openapi/v2-keycloak-auth-manager-generated.yaml +/src/airflow/providers/edge3/plugins/www/* +/src/airflow/providers/edge3/openapi/v2-edge-generated.yaml +/src/airflow/providers/fab/auth_manager/api_fastapi/openapi/v2-fab-auth-manager-generated.yaml +/src/airflow/providers/fab/www/static/dist/* +/any/dag_id=dag_for_testing_redis_task_handler/run_id=test/task_id=task_for_testing_redis_log_handler/attempt=1.log +/src/airflow/providers/google/ads/.gitignore + +# Vendored-in code +/src/airflow/providers/google/_vendor/* + +# Git ignore file +.gitignore diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index 70374c649086d..50d05e57408bd 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -614,11 +614,13 @@ you are checking): ```shell script VERSION=X.Y.Zrc1 +TASK_SDK_VERSION=X.Y.Zrc1 git fetch apache --tags git checkout ${VERSION} export AIRFLOW_REPO_ROOT=$(pwd) rm -rf dist/* breeze release-management prepare-airflow-distributions --distribution-format both +breeze release-management prepare-task-sdk-distributions --distribution-format both breeze release-management prepare-airflow-tarball --version ${VERSION} ``` @@ -628,6 +630,7 @@ will be done in a docker container. However, if you have `hatch` installed loc ```bash breeze release-management prepare-airflow-distributions --distribution-format both --use-local-hatch +breeze release-management prepare-task-sdk-distributions --distribution-format both --use-local-hatch breeze release-management prepare-airflow-tarball --version ${VERSION} ``` @@ -635,7 +638,7 @@ This is generally faster and requires less resources/network bandwidth. Note tha do it before preparing the tarball as preparing packages cleans up dist folder from apache-airflow artifacts as it uses hatch's `-c` build flag. -The `prepare-airflow-distributions` command (no matter if docker or local hatch is used) should produce the +The `prepare-*-distributions` commands (no matter if docker or local hatch is used) should produce the reproducible `.whl`, `.tar.gz` packages in the dist folder. The tarball command should produce reproducible `-source.tar.gz` tarball of sources. @@ -650,10 +653,16 @@ svn update --set-depth=infinity asf-dist/dev/airflow # Then compare the packages cd asf-dist/dev/airflow/${VERSION} -for i in ${AIRFLOW_REPO_ROOT}/dist/* +for i in *.whl *.tar.gz do - echo "Checking if $(basename $i) is the same as $i" - diff "$(basename $i)" "$i" && echo "OK" + echo "Checking if $(basename $i) is the same as ${AIRFLOW_REPO_ROOT}/dist/$(basename $i)" + diff "$(basename $i)" "${AIRFLOW_REPO_ROOT}/dist/$(basename $i)" && echo "OK" +done +cd ../task-sdk/${TASK_SDK_VERSION} +for i in *.whl *.tar.gz +do + echo "Checking if $(basename $i) is the same as ${AIRFLOW_REPO_ROOT}/dist/$(basename $i)" + diff "$(basename $i)" "${AIRFLOW_REPO_ROOT}/dist/$(basename $i)" && echo "OK" done ``` @@ -703,21 +712,35 @@ cd $AIRFLOW_REPO_ROOT/dev uv run check_files.py airflow -v ${VERSION} -p ${PATH_TO_SVN} ``` + +```shell script +cd $AIRFLOW_REPO_ROOT/dev +uv run check_files.py task-sdk -v ${TASK_SDK_VERSION} -p ${PATH_TO_SVN}/task-sdk +``` + ## Licence check This can be done with the Apache RAT tool. -* Download the latest jar from https://creadur.apache.org/rat/download_rat.cgi (unpack the binary, - the jar is inside) -* Unpack the release source archive (the `-source.tar.gz` file) to a folder -* Enter the sources folder run the check +Download the latest jar from https://creadur.apache.org/rat/download_rat.cgi (unpack the binary, the jar is inside) +wget -qO- https://dlcdn.apache.org//creadur/apache-rat-0.17/apache-rat-0.17-bin.tar.gz | gunzip | tar -C /tmp -xvf - + +Unpack the release source archive (the `-source.tar.gz` file) to a folder + +```shell script +rm -rf /tmp/apache/airflow-src && mkdir -p /tmp/apache-airflow-src && tar -xzf ${PATH_TO_SVN}/${VERSION}/apache-airflow-*-source.tar.gz -C /tmp/apache-airflow-src +``` + +Run the check: ```shell script -java -jar ../../apache-rat-0.13/apache-rat-0.13.jar -E .rat-excludes -d . +java -jar /tmp/apache-rat-0.17/apache-rat-0.17.jar --input-exclude-file ${AIRFLOW_REPO_ROOT}/.rat-excludes /tmp/apache-airflow-src | grep "! " ``` where `.rat-excludes` is the file in the root of Airflow source code. +You should see no files reported as Unknown or with wrong licence. + ## Signature check Make sure you have imported into your GPG the PGP key of the person signing the release. You can find the valid keys in diff --git a/dev/check_files.py b/dev/check_files.py index e7f16b5c612fd..69d243667a29c 100644 --- a/dev/check_files.py +++ b/dev/check_files.py @@ -46,6 +46,15 @@ """ +TASK_SDK_DOCKER = """\ +FROM python:3.10 + +# Upgrade +RUN pip install "apache-airflow-task-sdk=={}" + +""" + + DOCKER_UPGRADE = """\ FROM apache/airflow:1.10.15 @@ -131,7 +140,7 @@ def check_all_files(actual_files, expected_files): return missing_list -def check_release(files: list[str], version: str): +def check_airflow_release(files: list[str], version: str): print(f"Checking airflow release for version {version}:\n") version = strip_rc_suffix(version) @@ -147,6 +156,19 @@ def check_release(files: list[str], version: str): return check_all_files(expected_files=expected_files, actual_files=files) +def check_task_sdk_release(files: list[str], version: str): + print(f"Checking task-sdk release for version {version}:\n") + version = strip_rc_suffix(version) + + expected_files = expand_name_variations( + [ + f"apache_airflow_task_sdk-{version}.tar.gz", + f"apache_airflow_task_sdk-{version}-py3-none-any.whl", + ] + ) + return check_all_files(expected_files=expected_files, actual_files=files) + + def expand_name_variations(files): return sorted(base + suffix for base, suffix in itertools.product(files, ["", ".asc", ".sha512"])) @@ -225,13 +247,26 @@ def providers(ctx, path: str): @click.pass_context def airflow(ctx, path: str, version: str): files = os.listdir(os.path.join(path, version)) - missing_files = check_release(files, version) + missing_files = check_airflow_release(files, version) create_docker(AIRFLOW_DOCKER.format(version)) if missing_files: warn_of_missing_files(missing_files) return +@click.command(name="task-sdk") +@path_option +@version_option +@click.pass_context +def task_sdk(ctx, path: str, version: str): + files = os.listdir(os.path.join(path, version)) + missing_files = check_task_sdk_release(files, version) + create_docker(TASK_SDK_DOCKER.format(version)) + if missing_files: + warn_of_missing_files(missing_files) + return + + @click.command() @path_option @version_option @@ -248,6 +283,7 @@ def upgrade_check(ctx, path: str, version: str): cli.add_command(providers) cli.add_command(airflow) +cli.add_command(task_sdk) cli.add_command(upgrade_check) if __name__ == "__main__": @@ -273,7 +309,7 @@ def test_check_release_pass(): "apache_airflow_core-2.8.1.tar.gz.asc", "apache_airflow_core-2.8.1.tar.gz.sha512", ] - assert check_release(files, version="2.8.1rc2") == [] + assert check_airflow_release(files, version="2.8.1rc2") == [] def test_check_release_fail(): @@ -294,7 +330,7 @@ def test_check_release_fail(): "apache_airflow_core-2.8.1.tar.gz.sha512", ] - missing_files = check_release(files, version="2.8.1rc2") + missing_files = check_airflow_release(files, version="2.8.1rc2") assert missing_files == ["apache_airflow-2.8.1.tar.gz", "apache_airflow_core-2.8.1.tar.gz"] diff --git a/scripts/ci/dockerfiles/apache-rat/build_and_push.sh b/scripts/ci/dockerfiles/apache-rat/build_and_push.sh index ddd004d9337e5..0980c121d6c92 100755 --- a/scripts/ci/dockerfiles/apache-rat/build_and_push.sh +++ b/scripts/ci/dockerfiles/apache-rat/build_and_push.sh @@ -19,10 +19,10 @@ set -euo pipefail GITHUB_REPOSITORY=${GITHUB_REPOSITORY:="apache/airflow"} readonly GITHUB_REPOSITORY -APACHERAT_VERSION="0.16.1" +APACHERAT_VERSION="0.17" readonly APACHERAT_VERSION -AIRFLOW_APACHERAT_VERSION="2024.03.23" +AIRFLOW_APACHERAT_VERSION="2025.10.24" readonly AIRFLOW_APACHERAT_VERSION COMMIT_SHA=$(git rev-parse HEAD) diff --git a/scripts/ci/prek/check_license.py b/scripts/ci/prek/check_license.py index c60d6bc9fa92b..bb50dca43ab9a 100755 --- a/scripts/ci/prek/check_license.py +++ b/scripts/ci/prek/check_license.py @@ -37,11 +37,10 @@ "--user", f"{os.getuid()}:{os.getgid()}", "--rm", - "ghcr.io/apache/airflow-apache-rat:0.16.1-2024.03.23@sha256:83c4d2610ec4a439d1809a67fadbdc9a1df089ab130b32209351bdd4527a3f02", - "-d", - "/opt/airflow", - "--exclude-file", + "ghcr.io/apache/airflow-apache-rat:0.17-2025.10.24@sha256:63e965ecfa195d38cf0525b16ad801dff75833ee97d88cd763020537c36981c9", + "--input-exclude-file", "/opt/airflow/.rat-excludes", + "/opt/airflow", ] print("Running command:") @@ -56,7 +55,10 @@ output = result.stdout if result.returncode != 0: print(f"\033[0;31mERROR: {result.returncode} when running rat\033[0m\n") - print(output) + lines = output.splitlines() + for line in lines: + if "! " in line: + print(line) sys.exit(result.returncode) unknown_licences = [line for line in output.splitlines() if "??" in line] if unknown_licences: diff --git a/task-sdk/.gitignore b/task-sdk/.gitignore new file mode 100644 index 0000000000000..0da25061558ac --- /dev/null +++ b/task-sdk/.gitignore @@ -0,0 +1,14 @@ +# Potentialy created files +.uv-cache +dist +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg