diff --git a/.github/actions/checkout_target_commit/action.yml b/.github/actions/checkout_target_commit/action.yml index e90ae0199804c..e95e8b86254a0 100644 --- a/.github/actions/checkout_target_commit/action.yml +++ b/.github/actions/checkout_target_commit/action.yml @@ -65,13 +65,16 @@ runs: rm -rfv "dev" rm -rfv ".github/actions" rm -rfv ".github/workflows" + rm -v ".dockerignore" || true mv -v "target-airflow/scripts/ci" "scripts" mv -v "target-airflow/dev" "." mv -v "target-airflow/.github/actions" "target-airflow/.github/workflows" ".github" + mv -v "target-airflow/.dockerignore" ".dockerignore" || true if: inputs.pull-request-target == 'true' && inputs.is-committer-build != 'true' #################################################################################################### - # AFTER IT'S SAFE. THE `dev`, `scripts/ci` AND `.github/actions` ARE NOW COMING FROM THE - # BASE_REF - WHICH IS THE TARGET BRANCH OF THE PR. WE CAN TRUST THAT THOSE SCRIPTS ARE SAFE TO RUN. + # AFTER IT'S SAFE. THE `dev`, `scripts/ci` AND `.github/actions` and `.dockerignore` ARE NOW COMING + # FROM THE BASE_REF - WHICH IS THE TARGET BRANCH OF THE PR. WE CAN TRUST THAT THOSE SCRIPTS ARE + # SAFE TO RUN AND CODE AVAILABLE IN THE DOCKER BUILD PHASE IS CONTROLLED BY THE `.dockerignore`. # ALL THE REST OF THE CODE COMES FROM THE PR, AND FOR EXAMPLE THE CODE IN THE `Dockerfile.ci` CAN # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS # ISOLATED FROM THE RUNNER. diff --git a/.github/actions/install-pre-commit/action.yml b/.github/actions/install-pre-commit/action.yml new file mode 100644 index 0000000000000..02eea2c722917 --- /dev/null +++ b/.github/actions/install-pre-commit/action.yml @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: 'Install pre-commit' +description: 'Installs pre-commit and related packages' +inputs: + python-version: + description: 'Python version to use' + default: 3.9 + uv-version: + description: 'uv version to use' + default: 0.4.29 + pre-commit-version: + description: 'pre-commit version to use' + default: 4.0.1 + pre-commit-uv-version: + description: 'pre-commit-uv version to use' + default: 4.1.4 +runs: + using: "composite" + steps: + - name: Install pre-commit, uv, and pre-commit-uv + shell: bash + run: > + pip install + pre-commit==${{inputs.pre-commit-version}} + uv==${{inputs.uv-version}} + pre-commit-uv==${{inputs.pre-commit-uv-version}} + - name: Cache pre-commit envs + uses: actions/cache@v4 + with: + path: ~/.cache/pre-commit + key: "pre-commit-${{inputs.python-version}}-${{ hashFiles('.pre-commit-config.yaml') }}" + restore-keys: | + pre-commit-${{inputs.python-version}}- diff --git a/.github/workflows/additional-ci-image-checks.yml b/.github/workflows/additional-ci-image-checks.yml index ae9efdb6b0340..82b143d2f03e4 100644 --- a/.github/workflows/additional-ci-image-checks.yml +++ b/.github/workflows/additional-ci-image-checks.yml @@ -80,6 +80,10 @@ on: # yamllint disable-line rule:truthy description: "Whether to debug resources (true/false)" required: true type: string + use-uv: + description: "Whether to use uv to build the image (true/false)" + required: true + type: string jobs: # Push early BuildX cache to GitHub Registry in Apache repository, This cache does not wait for all the # tests to complete - it is run very early in the build process for "main" merges in order to refresh @@ -109,7 +113,7 @@ jobs: python-versions: ${{ inputs.python-versions }} branch: ${{ inputs.branch }} constraints-branch: ${{ inputs.constraints-branch }} - use-uv: "true" + use-uv: ${{ inputs.use-uv}} include-success-outputs: ${{ inputs.include-success-outputs }} docker-cache: ${{ inputs.docker-cache }} if: inputs.branch == 'main' @@ -165,6 +169,6 @@ jobs: # platform: "linux/arm64" # branch: ${{ inputs.branch }} # constraints-branch: ${{ inputs.constraints-branch }} -# use-uv: "true" +# use-uv: ${{ inputs.use-uv}} # upgrade-to-newer-dependencies: ${{ inputs.upgrade-to-newer-dependencies }} # docker-cache: ${{ inputs.docker-cache }} diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index 7ab09d1cd2fec..5141feae22380 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -232,16 +232,11 @@ jobs: - name: "Install Breeze" uses: ./.github/actions/breeze id: breeze - - name: Cache pre-commit envs - uses: actions/cache@v4 + - name: "Install pre-commit" + uses: ./.github/actions/install-pre-commit + id: pre-commit with: - path: ~/.cache/pre-commit - # yamllint disable-line rule:line-length - key: "pre-commit-${{steps.breeze.outputs.host-python-version}}-${{ hashFiles('.pre-commit-config.yaml') }}" - restore-keys: "\ - pre-commit-${{steps.breeze.outputs.host-python-version}}-\ - ${{ hashFiles('.pre-commit-config.yaml') }}\n - pre-commit-${{steps.breeze.outputs.host-python-version}}-" + python-version: ${{steps.breeze.outputs.host-python-version}} - name: Fetch incoming commit ${{ github.sha }} with its parent uses: actions/checkout@v4 with: diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index abf966faede02..55e6c5d2018b9 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -16,7 +16,7 @@ # under the License. # --- -name: "Build Images" +name: Build Images run-name: > Build images for ${{ github.event.pull_request.title }} ${{ github.event.pull_request._links.html.href }} on: # yamllint disable-line rule:truthy @@ -54,7 +54,7 @@ concurrency: jobs: build-info: timeout-minutes: 10 - name: "Build Info" + name: Build Info # At build-info stage we do not yet have outputs so we need to hard-code the runs-on to public runners runs-on: ["ubuntu-22.04"] env: @@ -71,6 +71,7 @@ jobs: prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} docker-cache: ${{ steps.selective-checks.outputs.docker-cache }} default-branch: ${{ steps.selective-checks.outputs.default-branch }} + force-pip: ${{ steps.selective-checks.outputs.force-pip }} constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} runs-on-as-json-default: ${{ steps.selective-checks.outputs.runs-on-as-json-default }} runs-on-as-json-public: ${{ steps.selective-checks.outputs.runs-on-as-json-public }} @@ -89,7 +90,7 @@ jobs: }}" if: github.repository == 'apache/airflow' steps: - - name: "Cleanup repo" + - name: Cleanup repo shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: Discover PR merge commit @@ -154,13 +155,13 @@ jobs: # COMPOSITE ACTIONS. WE CAN RUN ANYTHING THAT IS IN THE TARGET BRANCH AND THERE IS NO RISK THAT # CODE WILL BE RUN FROM THE PR. #################################################################################################### - - name: "Cleanup docker" + - name: Cleanup docker run: ./scripts/ci/cleanup_docker.sh - - name: "Setup python" + - name: Setup python uses: actions/setup-python@v5 with: - python-version: 3.8 - - name: "Install Breeze" + python-version: "3.9" + - name: Install Breeze uses: ./.github/actions/breeze #################################################################################################### # WE RUN SELECTIVE CHECKS HERE USING THE TARGET COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM @@ -202,7 +203,7 @@ jobs: pull-request-target: "true" is-committer-build: ${{ needs.build-info.outputs.is-committer-build }} push-image: "true" - use-uv: "true" + use-uv: ${{ needs.build-info.outputs.force-pip && 'false' || 'true' }} image-tag: ${{ needs.build-info.outputs.image-tag }} platform: "linux/amd64" python-versions: ${{ needs.build-info.outputs.python-versions }} @@ -212,7 +213,7 @@ jobs: docker-cache: ${{ needs.build-info.outputs.docker-cache }} generate-constraints: - name: "Generate constraints" + name: Generate constraints needs: [build-info, build-ci-images] uses: ./.github/workflows/generate-constraints.yml with: @@ -245,9 +246,9 @@ jobs: pull-request-target: "true" is-committer-build: ${{ needs.build-info.outputs.is-committer-build }} push-image: "true" - use-uv: "true" + use-uv: ${{ needs.build-info.outputs.force-pip && 'false' || 'true' }} image-tag: ${{ needs.build-info.outputs.image-tag }} - platform: "linux/amd64" + platform: linux/amd64 python-versions: ${{ needs.build-info.outputs.python-versions }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} branch: ${{ needs.build-info.outputs.default-branch }} diff --git a/.github/workflows/check-providers.yml b/.github/workflows/check-providers.yml index 622a67fea97a1..e89d4a81faaca 100644 --- a/.github/workflows/check-providers.yml +++ b/.github/workflows/check-providers.yml @@ -28,6 +28,10 @@ on: # yamllint disable-line rule:truthy description: "Tag to set for the image" required: true type: string + canary-run: + description: "Whether this is a canary run" + required: true + type: string default-python-version: description: "Which version of python should be used by default" required: true @@ -209,6 +213,7 @@ jobs: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERSION_SUFFIX_FOR_PYPI: "dev0" VERBOSE: "true" + CLEAN_AIRFLOW_INSTALLATION: "${{ inputs.canary-run }}" if: inputs.skip-provider-tests != 'true' steps: - name: "Cleanup repo" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 866f8f253d401..4da5ca6c8ae8b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,6 +75,7 @@ jobs: default-mysql-version: ${{ steps.selective-checks.outputs.default-mysql-version }} default-helm-version: ${{ steps.selective-checks.outputs.default-helm-version }} default-kind-version: ${{ steps.selective-checks.outputs.default-kind-version }} + force-pip: ${{ steps.selective-checks.outputs.force-pip }} full-tests-needed: ${{ steps.selective-checks.outputs.full-tests-needed }} parallel-test-types-list-as-string: >- ${{ steps.selective-checks.outputs.parallel-test-types-list-as-string }} @@ -95,7 +96,7 @@ jobs: ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }} prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} docs-build: ${{ steps.selective-checks.outputs.docs-build }} - mypy-folders: ${{ steps.selective-checks.outputs.mypy-folders }} + mypy-checks: ${{ steps.selective-checks.outputs.mypy-checks }} needs-mypy: ${{ steps.selective-checks.outputs.needs-mypy }} needs-helm-tests: ${{ steps.selective-checks.outputs.needs-helm-tests }} needs-api-tests: ${{ steps.selective-checks.outputs.needs-api-tests }} @@ -199,7 +200,7 @@ jobs: platform: "linux/amd64" python-versions: ${{ needs.build-info.outputs.python-versions }} branch: ${{ needs.build-info.outputs.default-branch }} - use-uv: "true" + use-uv: ${{ needs.build-info.outputs.force-pip && 'false' || 'true' }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} @@ -263,6 +264,7 @@ jobs: latest-versions-only: ${{ needs.build-info.outputs.latest-versions-only }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.force-pip && 'false' || 'true' }} generate-constraints: @@ -290,7 +292,7 @@ jobs: runs-on-as-json-docs-build: ${{ needs.build-info.outputs.runs-on-as-json-docs-build }} image-tag: ${{ needs.build-info.outputs.image-tag }} needs-mypy: ${{ needs.build-info.outputs.needs-mypy }} - mypy-folders: ${{ needs.build-info.outputs.mypy-folders }} + mypy-checks: ${{ needs.build-info.outputs.mypy-checks }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} branch: ${{ needs.build-info.outputs.default-branch }} canary-run: ${{ needs.build-info.outputs.canary-run }} @@ -304,6 +306,7 @@ jobs: ci-image-build: ${{ needs.build-info.outputs.ci-image-build }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + docs-build: ${{ needs.build-info.outputs.docs-build }} providers: name: "Provider checks" @@ -319,6 +322,7 @@ jobs: with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} image-tag: ${{ needs.build-info.outputs.image-tag }} + canary-run: ${{ needs.build-info.outputs.canary-run }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} affected-providers-list-as-string: ${{ needs.build-info.outputs.affected-providers-list-as-string }} @@ -541,7 +545,7 @@ jobs: default-python-version: ${{ needs.build-info.outputs.default-python-version }} branch: ${{ needs.build-info.outputs.default-branch }} push-image: "true" - use-uv: "true" + use-uv: ${{ needs.build-info.outputs.force-pip && 'false' || 'true' }} build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} @@ -622,6 +626,7 @@ jobs: kubernetes-versions-list-as-string: ${{ needs.build-info.outputs.kubernetes-versions-list-as-string }} kubernetes-combos-list-as-string: ${{ needs.build-info.outputs.kubernetes-combos-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} + use-uv: ${{ needs.build-info.outputs.force-pip && 'false' || 'true' }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} if: > ( needs.build-info.outputs.run-kubernetes-tests == 'true' || diff --git a/.github/workflows/finalize-tests.yml b/.github/workflows/finalize-tests.yml index 8b392ba204664..a460dbe151a30 100644 --- a/.github/workflows/finalize-tests.yml +++ b/.github/workflows/finalize-tests.yml @@ -145,7 +145,7 @@ jobs: python-versions: ${{ inputs.python-versions }} branch: ${{ inputs.branch }} constraints-branch: ${{ inputs.constraints-branch }} - use-uv: "true" + use-uv: ${{ needs.build-info.outputs.force-pip && 'false' || 'true' }} include-success-outputs: ${{ inputs.include-success-outputs }} docker-cache: ${{ inputs.docker-cache }} if: inputs.canary-run == 'true' diff --git a/.github/workflows/generate-constraints.yml b/.github/workflows/generate-constraints.yml index 207fd4339c8db..d6e536dfd091a 100644 --- a/.github/workflows/generate-constraints.yml +++ b/.github/workflows/generate-constraints.yml @@ -95,7 +95,7 @@ jobs: timeout-minutes: 25 run: > breeze release-management generate-constraints --run-in-parallel - --airflow-constraints-mode constraints-no-providers --answer yes + --airflow-constraints-mode constraints-no-providers --answer yes --parallelism 3 # The no providers constraints are only needed when we want to update constraints (in canary builds) # They slow down the start of PROD image builds so we want to only run them when needed. if: inputs.generate-no-providers-constraints == 'true' @@ -115,7 +115,7 @@ jobs: run: > breeze release-management generate-constraints --run-in-parallel --airflow-constraints-mode constraints --answer yes - --chicken-egg-providers "${{ inputs.chicken-egg-providers }}" + --chicken-egg-providers "${{ inputs.chicken-egg-providers }}" --parallelism 3 - name: "Dependency upgrade summary" shell: bash run: | diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index e831350f5b186..530d0f9fc5636 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -59,6 +59,7 @@ on: # yamllint disable-line rule:truthy jobs: tests-integration: timeout-minutes: 130 + if: inputs.testable-integrations != '[]' name: "Integration Tests: ${{ matrix.integration }}" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} strategy: diff --git a/.github/workflows/k8s-tests.yml b/.github/workflows/k8s-tests.yml index c4b72a9afc924..3b3e067038db9 100644 --- a/.github/workflows/k8s-tests.yml +++ b/.github/workflows/k8s-tests.yml @@ -44,6 +44,10 @@ on: # yamllint disable-line rule:truthy description: "Whether to include success outputs" required: true type: string + use-uv: + description: "Whether to use uv" + required: true + type: string debug-resources: description: "Whether to debug resources" required: true @@ -96,6 +100,9 @@ jobs: key: "\ k8s-env-${{ steps.breeze.outputs.host-python-version }}-\ ${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','hatch_build.py') }}" + - name: "Switch breeze to use uv" + run: breeze setup config --use-uv + if: inputs.use-uv == 'true' - name: Run complete K8S tests ${{ inputs.kubernetes-combos-list-as-string }} run: breeze k8s run-complete-tests --run-in-parallel --upgrade --no-copy-local-sources env: diff --git a/.github/workflows/release_dockerhub_image.yml b/.github/workflows/release_dockerhub_image.yml index 100e850a6fd84..5ce1585131f76 100644 --- a/.github/workflows/release_dockerhub_image.yml +++ b/.github/workflows/release_dockerhub_image.yml @@ -85,6 +85,7 @@ jobs: "kaxil", "pierrejeambrun", "potiuk", + "utkarsharma2" ]'), github.event.sender.login) steps: - name: "Cleanup repo" diff --git a/.github/workflows/static-checks-mypy-docs.yml b/.github/workflows/static-checks-mypy-docs.yml index 9a1e4ac4ac7f9..be2c4f8e28645 100644 --- a/.github/workflows/static-checks-mypy-docs.yml +++ b/.github/workflows/static-checks-mypy-docs.yml @@ -36,7 +36,7 @@ on: # yamllint disable-line rule:truthy description: "Whether to run mypy checks (true/false)" required: true type: string - mypy-folders: + mypy-checks: description: "List of folders to run mypy checks on" required: false type: string @@ -92,6 +92,10 @@ on: # yamllint disable-line rule:truthy description: "Whether to debug resources (true/false)" required: true type: string + docs-build: + description: "Whether to build docs (true/false)" + required: true + type: string jobs: static-checks: timeout-minutes: 45 @@ -122,14 +126,11 @@ jobs: - name: "Prepare breeze & CI image: ${{ inputs.default-python-version}}:${{ inputs.image-tag }}" uses: ./.github/actions/prepare_breeze_and_image id: breeze - - name: Cache pre-commit envs - uses: actions/cache@v4 + - name: "Install pre-commit" + uses: ./.github/actions/install-pre-commit + id: pre-commit with: - path: ~/.cache/pre-commit - # yamllint disable-line rule:line-length - key: "pre-commit-${{steps.breeze.outputs.host-python-version}}-${{ hashFiles('.pre-commit-config.yaml') }}" - restore-keys: | - pre-commit-${{steps.breeze.outputs.host-python-version}}- + python-version: ${{steps.breeze.outputs.host-python-version}} - name: "Static checks" run: breeze static-checks --all-files --show-diff-on-failure --color always --initialize-environment env: @@ -148,7 +149,7 @@ jobs: strategy: fail-fast: false matrix: - mypy-folder: ${{ fromJSON(inputs.mypy-folders) }} + mypy-check: ${{ fromJSON(inputs.mypy-checks) }} env: PYTHON_MAJOR_MINOR_VERSION: "${{inputs.default-python-version}}" IMAGE_TAG: "${{ inputs.image-tag }}" @@ -166,10 +167,13 @@ jobs: - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" uses: ./.github/actions/prepare_breeze_and_image id: breeze - - name: "MyPy checks for ${{ matrix.mypy-folder }}" - run: | - pip install pre-commit - pre-commit run --color always --verbose --hook-stage manual mypy-${{matrix.mypy-folder}} --all-files + - name: "Install pre-commit" + uses: ./.github/actions/install-pre-commit + id: pre-commit + with: + python-version: ${{steps.breeze.outputs.host-python-version}} + - name: "MyPy checks for ${{ matrix.mypy-check }}" + run: pre-commit run --color always --verbose --hook-stage manual ${{matrix.mypy-check}} --all-files env: VERBOSE: "false" COLUMNS: "250" @@ -182,6 +186,7 @@ jobs: timeout-minutes: 150 name: "Build documentation" runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} + if: inputs.docs-build == 'true' strategy: fail-fast: false matrix: @@ -231,8 +236,6 @@ jobs: timeout-minutes: 150 name: "Publish documentation" needs: build-docs - # For canary runs we need to push documentation to AWS S3 and preparing it takes a lot of space - # So we should use self-hosted ASF runners for this runs-on: ${{ fromJSON(inputs.runs-on-as-json-docs-build) }} env: GITHUB_REPOSITORY: ${{ github.repository }} @@ -259,16 +262,22 @@ jobs: with: name: airflow-docs path: './docs/_build' + - name: Check disk space available + run: df -h + - name: Create /mnt/airflow-site directory + run: sudo mkdir -p /mnt/airflow-site && sudo chown -R "${USER}" /mnt/airflow-site - name: "Clone airflow-site" run: > - git clone https://github.com/apache/airflow-site.git ${GITHUB_WORKSPACE}/airflow-site && - echo "AIRFLOW_SITE_DIRECTORY=${GITHUB_WORKSPACE}/airflow-site" >> "$GITHUB_ENV" + git clone https://github.com/apache/airflow-site.git /mnt/airflow-site/airflow-site && + echo "AIRFLOW_SITE_DIRECTORY=/mnt/airflow-site/airflow-site" >> "$GITHUB_ENV" - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" uses: ./.github/actions/prepare_breeze_and_image - name: "Publish docs" run: > breeze release-management publish-docs --override-versioned --run-in-parallel ${{ inputs.docs-list-as-string }} + - name: Check disk space available + run: df -h - name: "Generate back references for providers" run: breeze release-management add-back-references all-providers - name: "Generate back references for apache-airflow" diff --git a/.gitignore b/.gitignore index 0c94718749f57..5a1a0446b74d6 100644 --- a/.gitignore +++ b/.gitignore @@ -247,7 +247,5 @@ licenses/LICENSES-ui.txt # airflow-build-dockerfile and correconding ignore file airflow-build-dockerfile* -# Airflow 3 files -# These directories are ignored so someone can develop on both of them without deleting files manually -airflow/ui -task_sdk +# Temporary ignore uv.lock until we integrate it fully in our constraint preparation mechanism +/uv.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6a19314521b8..bb6c37ca4d598 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -467,21 +467,21 @@ repos: files: ^docs/apache-airflow/extra-packages-ref\.rst$|^hatch_build.py pass_filenames: false entry: ./scripts/ci/pre_commit/check_extra_packages_ref.py - additional_dependencies: ['rich>=12.4.4', 'hatchling==1.25.0', 'tabulate'] + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.26.3', 'tabulate'] - id: check-hatch-build-order name: Check order of dependencies in hatch_build.py language: python files: ^hatch_build.py$ pass_filenames: false entry: ./scripts/ci/pre_commit/check_order_hatch_build.py - additional_dependencies: ['rich>=12.4.4', 'hatchling==1.25.0'] + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.26.3'] - id: update-extras name: Update extras in documentation entry: ./scripts/ci/pre_commit/insert_extras.py language: python files: ^contributing-docs/12_airflow_dependencies_and_extras.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$|^Dockerfile.* pass_filenames: false - additional_dependencies: ['rich>=12.4.4', 'hatchling==1.25.0'] + additional_dependencies: ['rich>=12.4.4', 'hatchling==1.26.3'] - id: check-extras-order name: Check order of extras in Dockerfile entry: ./scripts/ci/pre_commit/check_order_dockerfile_extras.py diff --git a/Dockerfile b/Dockerfile index cf5226c00086f..4cdf1a8bb3409 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,8 +49,8 @@ ARG AIRFLOW_VERSION="2.9.3" ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" -ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.1 +ARG AIRFLOW_PIP_VERSION=24.3.1 +ARG AIRFLOW_UV_VERSION=0.4.29 ARG AIRFLOW_USE_UV="false" ARG UV_HTTP_TIMEOUT="300" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" diff --git a/Dockerfile.ci b/Dockerfile.ci index d23e810fa3677..e188a7ec39115 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1297,8 +1297,8 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. ARG AIRFLOW_CI_BUILD_EPOCH="10" ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" -ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.1 +ARG AIRFLOW_PIP_VERSION=24.3.1 +ARG AIRFLOW_UV_VERSION=0.4.29 ARG AIRFLOW_USE_UV="true" # Setup PIP # By default PIP install run without cache to make image smaller @@ -1321,8 +1321,8 @@ ARG AIRFLOW_VERSION="" # Additional PIP flags passed to all pip install commands except reinstalling pip itself ARG ADDITIONAL_PIP_INSTALL_FLAGS="" -ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.1 +ARG AIRFLOW_PIP_VERSION=24.3.1 +ARG AIRFLOW_UV_VERSION=0.4.29 ARG AIRFLOW_USE_UV="true" ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ diff --git a/README.md b/README.md index 754ed9341739d..8da91a71f9ad8 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ # Apache Airflow [![PyPI version](https://badge.fury.io/py/apache-airflow.svg)](https://badge.fury.io/py/apache-airflow) -[![GitHub Build](https://github.com/apache/airflow/workflows/Tests/badge.svg)](https://github.com/apache/airflow/actions) +[![GitHub Build](https://github.com/apache/airflow/actions/workflows/ci.yml/badge.svg?branch=v2-10-test)](https://github.com/apache/airflow/actions/workflows/ci.yml?query=branch%3Av2-10-test) [![Coverage Status](https://codecov.io/gh/apache/airflow/graph/badge.svg?token=WdLKlKHOAU)](https://codecov.io/gh/apache/airflow) [![License](https://img.shields.io/:license-Apache%202-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0.txt) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/apache-airflow.svg)](https://pypi.org/project/apache-airflow/) @@ -97,9 +97,9 @@ Airflow is not a streaming solution, but it is often used to process real-time d Apache Airflow is tested with: -| | Main version (dev) | Stable version (2.10.3) | +| | Main version (dev) | Stable version (2.10.4) | |-------------|------------------------------|------------------------------| -| Python | 3.8, 3.9, 3.10, 3.11, 3.12 | 3.8, 3.9, 3.10, 3.11, 3.12 | +| Python | 3.9, 3.10, 3.11, 3.12 | 3.8, 3.9, 3.10, 3.11, 3.12 | | Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | | Kubernetes | 1.26, 1.27, 1.28, 1.29, 1.30 | 1.26, 1.27, 1.28, 1.29, 1.30 | | PostgreSQL | 12, 13, 14, 15, 16 | 12, 13, 14, 15, 16 | @@ -108,9 +108,7 @@ Apache Airflow is tested with: \* Experimental -**Note**: MySQL 5.x versions are unable to or have limitations with -running multiple schedulers -- please see the [Scheduler docs](https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/scheduler.html). -MariaDB is not tested/recommended. +**Note**: MariaDB is not tested/recommended. **Note**: SQLite is used in Airflow tests. Do not use it in production. We recommend using the latest stable version of SQLite for local development. @@ -177,15 +175,15 @@ them to the appropriate format and workflow that your tool requires. ```bash -pip install 'apache-airflow==2.10.3' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.3/constraints-3.8.txt" +pip install 'apache-airflow==2.10.4' \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.4/constraints-3.8.txt" ``` 2. Installing with extras (i.e., postgres, google) ```bash pip install 'apache-airflow[postgres,google]==2.8.3' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.3/constraints-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.4/constraints-3.8.txt" ``` For information on installing provider packages, check @@ -290,7 +288,7 @@ Apache Airflow version life cycle: | Version | Current Patch/Minor | State | First Release | Limited Support | EOL/Terminated | |-----------|-----------------------|-----------|-----------------|-------------------|------------------| -| 2 | 2.10.3 | Supported | Dec 17, 2020 | TBD | TBD | +| 2 | 2.10.4 | Supported | Dec 17, 2020 | TBD | TBD | | 1.10 | 1.10.15 | EOL | Aug 27, 2018 | Dec 17, 2020 | June 17, 2021 | | 1.9 | 1.9.0 | EOL | Jan 03, 2018 | Aug 27, 2018 | Aug 27, 2018 | | 1.8 | 1.8.2 | EOL | Mar 19, 2017 | Jan 03, 2018 | Jan 03, 2018 | diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 0d27630b70800..146c661c84ffa 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -21,13 +21,65 @@ .. towncrier release notes start +Airflow 2.10.4 (2024-12-09) +--------------------------- + +Significant Changes +^^^^^^^^^^^^^^^^^^^ + +TaskInstance ``priority_weight`` is capped in 32-bit signed integer ranges (#43611) +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Some database engines are limited to 32-bit integer values. As some users reported errors in +weight rolled-over to negative values, we decided to cap the value to the 32-bit integer. Even +if internally in python smaller or larger values to 64 bit are supported, ``priority_weight`` is +capped and only storing values from -2147483648 to 2147483647. + +Bug Fixes +^^^^^^^^^ + +- Fix stats of dynamic mapped tasks after automatic retries of failed tasks (#44300) +- Fix wrong display of multi-line messages in the log after filtering (#44457) +- Allow "/" in metrics validator (#42934) (#44515) +- Fix gantt flickering (#44488) (#44517) +- Fix problem with inability to remove fields from Connection form (#40421) (#44442) +- Check pool_slots on partial task import instead of execution (#39724) (#42693) +- Avoid grouping task instance stats by try_number for dynamic mapped tasks (#44300) (#44319) +- Re-queue task when they are stuck in queued (#43520) (#44158) +- Suppress the warnings where we check for sensitive values (#44148) (#44167) +- Fix get_task_instance_try_details to return appropriate schema (#43830) (#44133) +- Log message source details are grouped (#43681) (#44070) +- Fix duplication of Task tries in the UI (#43891) (#43950) +- Add correct mime-type in OpenAPI spec (#43879) (#43901) +- Disable extra links button if link is null or empty (#43844) (#43851) +- Disable XCom list ordering by execution_date (#43680) (#43696) +- Fix venv numpy example which needs to be 1.26 at least to be working in Python 3.12 (#43659) +- Fix Try Selector in Mapped Tasks also on Index 0 (#43590) (#43591) +- Prevent using ``trigger_rule="always"`` in a dynamic mapped task (#43810) +- Prevent using ``trigger_rule=TriggerRule.ALWAYS`` in a task-generated mapping within bare tasks (#44751) + +Doc Only Changes +"""""""""""""""" +- Update XCom docs around containers/helm (#44570) (#44573) + +Miscellaneous +""""""""""""" +- Raise deprecation warning when accessing inlet or outlet events through str (#43922) + + Airflow 2.10.3 (2024-11-04) --------------------------- Significant Changes ^^^^^^^^^^^^^^^^^^^ -No significant changes. +Enhancing BashOperator to Execute Templated Bash Scripts as Temporary Files (44641) +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Bash script files (``.sh`` and ``.bash``) with Jinja templating enabled (without the space after the file +extension) are now rendered into a temporary file, and then executed. Instead of being directly executed +as inline command. + Bug Fixes """"""""" @@ -62,6 +114,7 @@ Bug Fixes - Ensure total_entries in /api/v1/dags (#43377) (#43429) - Include limit and offset in request body schema for List task instances (batch) endpoint (#43479) - Don't raise a warning in ExecutorSafeguard when execute is called from an extended operator (#42849) (#43577) +- Double-check TaskInstance state if it differs from the Executor state.(#43063) Miscellaneous """"""""""""" diff --git a/airflow/__init__.py b/airflow/__init__.py index 1e04b0048bb3b..818bec887bf71 100644 --- a/airflow/__init__.py +++ b/airflow/__init__.py @@ -17,7 +17,7 @@ # under the License. from __future__ import annotations -__version__ = "2.10.3" +__version__ = "2.10.4" import os import sys diff --git a/airflow/api_connexion/endpoints/task_instance_endpoint.py b/airflow/api_connexion/endpoints/task_instance_endpoint.py index a79af61f69bed..2eb63260e348e 100644 --- a/airflow/api_connexion/endpoints/task_instance_endpoint.py +++ b/airflow/api_connexion/endpoints/task_instance_endpoint.py @@ -840,7 +840,12 @@ def _query(orm_object): ) return query - task_instances = session.scalars(_query(TIH)).all() + session.scalars(_query(TI)).all() + # Exclude TaskInstance with state UP_FOR_RETRY since they have been recorded in TaskInstanceHistory + tis = session.scalars( + _query(TI).where(or_(TI.state != TaskInstanceState.UP_FOR_RETRY, TI.state.is_(None))) + ).all() + + task_instances = session.scalars(_query(TIH)).all() + tis return task_instance_history_collection_schema.dump( TaskInstanceHistoryCollection(task_instances=task_instances, total_entries=len(task_instances)) ) diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml index 74ef2121c3780..55a0b60c74d69 100644 --- a/airflow/api_connexion/openapi/v1.yaml +++ b/airflow/api_connexion/openapi/v1.yaml @@ -231,7 +231,7 @@ info: This means that the server encountered an unexpected condition that prevented it from fulfilling the request. - version: "2.10.3" + version: "2.10.4" license: name: Apache 2.0 url: http://www.apache.org/licenses/LICENSE-2.0.html @@ -1743,7 +1743,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/TaskInstance" + $ref: "#/components/schemas/TaskInstanceHistory" "401": $ref: "#/components/responses/Unauthenticated" "403": @@ -1774,7 +1774,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/TaskInstanceCollection" + $ref: "#/components/schemas/TaskInstanceHistoryCollection" "401": $ref: "#/components/responses/Unauthenticated" "403": @@ -1806,7 +1806,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/TaskInstanceCollection" + $ref: "#/components/schemas/TaskInstanceHistoryCollection" "401": $ref: "#/components/responses/Unauthenticated" "403": @@ -1836,7 +1836,7 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/TaskInstance" + $ref: "#/components/schemas/TaskInstanceHistory" "401": $ref: "#/components/responses/Unauthenticated" "403": @@ -2274,7 +2274,7 @@ paths: properties: content: type: string - plain/text: + text/plain: schema: type: string @@ -4021,7 +4021,95 @@ components: items: $ref: "#/components/schemas/TaskInstance" - $ref: "#/components/schemas/CollectionInfo" + TaskInstanceHistory: + type: object + properties: + task_id: + type: string + task_display_name: + type: string + description: | + Human centric display text for the task. + + *New in version 2.9.0* + dag_id: + type: string + dag_run_id: + type: string + description: | + The DagRun ID for this task instance + + *New in version 2.3.0* + start_date: + type: string + format: datetime + nullable: true + end_date: + type: string + format: datetime + nullable: true + duration: + type: number + nullable: true + state: + $ref: "#/components/schemas/TaskState" + try_number: + type: integer + map_index: + type: integer + max_tries: + type: integer + hostname: + type: string + unixname: + type: string + pool: + type: string + pool_slots: + type: integer + queue: + type: string + nullable: true + priority_weight: + type: integer + nullable: true + operator: + type: string + nullable: true + description: | + *Changed in version 2.1.1*: Field becomes nullable. + queued_when: + type: string + nullable: true + description: | + The datetime that the task enter the state QUEUE, also known as queue_at + pid: + type: integer + nullable: true + executor: + type: string + nullable: true + description: | + Executor the task is configured to run on or None (which indicates the default executor) + + *New in version 2.10.0* + executor_config: + type: string + + TaskInstanceHistoryCollection: + type: object + description: | + Collection of task instances . + *Changed in version 2.1.0*: 'total_entries' field is added. + allOf: + - type: object + properties: + task_instances_history: + type: array + items: + $ref: "#/components/schemas/TaskInstanceHistory" + - $ref: "#/components/schemas/CollectionInfo" TaskInstanceReference: type: object properties: diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 00d7cacc7f47a..613c5e3394a40 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -494,7 +494,7 @@ core: description: | Dataset URI validation should raise an exception if it is not compliant with AIP-60. By default this configuration is false, meaning that Airflow 2.x only warns the user. - In Airflow 3, this configuration will be enabled by default. + In Airflow 3, this configuration will be removed, unconditionally enabling strict validation. default: "False" example: ~ version_added: 2.9.2 diff --git a/airflow/configuration.py b/airflow/configuration.py index 81eb0fc725344..afb4b5f3808b6 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -856,7 +856,8 @@ def mask_secrets(self): for section, key in self.sensitive_config_values: try: - value = self.get(section, key, suppress_warnings=True) + with self.suppress_future_warnings(): + value = self.get(section, key, suppress_warnings=True) except AirflowConfigException: log.debug( "Could not retrieve value from section %s, for key %s. Skipping redaction of this conf.", diff --git a/airflow/decorators/base.py b/airflow/decorators/base.py index d743acbe50b2b..c0d46df67f188 100644 --- a/airflow/decorators/base.py +++ b/airflow/decorators/base.py @@ -403,6 +403,12 @@ def _validate_arg_names(self, func: ValidationSource, kwargs: dict[str, Any]): super()._validate_arg_names(func, kwargs) def expand(self, **map_kwargs: OperatorExpandArgument) -> XComArg: + if self.kwargs.get("trigger_rule") == TriggerRule.ALWAYS and any( + [isinstance(expanded, XComArg) for expanded in map_kwargs.values()] + ): + raise ValueError( + "Task-generated mapping within a task using 'expand' is not allowed with trigger rule 'always'." + ) if not map_kwargs: raise TypeError("no arguments to expand against") self._validate_arg_names("expand", map_kwargs) @@ -416,6 +422,21 @@ def expand(self, **map_kwargs: OperatorExpandArgument) -> XComArg: return self._expand(DictOfListsExpandInput(map_kwargs), strict=False) def expand_kwargs(self, kwargs: OperatorExpandKwargsArgument, *, strict: bool = True) -> XComArg: + if ( + self.kwargs.get("trigger_rule") == TriggerRule.ALWAYS + and not isinstance(kwargs, XComArg) + and any( + [ + isinstance(v, XComArg) + for kwarg in kwargs + if not isinstance(kwarg, XComArg) + for v in kwarg.values() + ] + ) + ): + raise ValueError( + "Task-generated mapping within a task using 'expand_kwargs' is not allowed with trigger rule 'always'." + ) if isinstance(kwargs, Sequence): for item in kwargs: if not isinstance(item, (XComArg, Mapping)): @@ -457,6 +478,12 @@ def _expand(self, expand_input: ExpandInput, *, strict: bool) -> XComArg: end_date = timezone.convert_to_utc(partial_kwargs.pop("end_date", None)) if partial_kwargs.get("pool") is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME + if "pool_slots" in partial_kwargs: + if partial_kwargs["pool_slots"] < 1: + dag_str = "" + if dag: + dag_str = f" in dag {dag.dag_id}" + raise ValueError(f"pool slots for {task_id}{dag_str} cannot be less than 1") partial_kwargs["retries"] = parse_retries(partial_kwargs.get("retries", DEFAULT_RETRIES)) partial_kwargs["retry_delay"] = coerce_timedelta( partial_kwargs.get("retry_delay", DEFAULT_RETRY_DELAY), diff --git a/airflow/example_dags/example_branch_operator_decorator.py b/airflow/example_dags/example_branch_operator_decorator.py index 59cb3b2919475..66cea26f391e2 100644 --- a/airflow/example_dags/example_branch_operator_decorator.py +++ b/airflow/example_dags/example_branch_operator_decorator.py @@ -117,7 +117,7 @@ def some_ext_py_task(): # Run the example a second time and see that it re-uses it and is faster. VENV_CACHE_PATH = tempfile.gettempdir() - @task.branch_virtualenv(requirements=["numpy~=1.24.4"], venv_cache_path=VENV_CACHE_PATH) + @task.branch_virtualenv(requirements=["numpy~=1.26.0"], venv_cache_path=VENV_CACHE_PATH) def branching_virtualenv(choices) -> str: import random @@ -137,7 +137,7 @@ def branching_virtualenv(choices) -> str: for option in options: @task.virtualenv( - task_id=f"venv_{option}", requirements=["numpy~=1.24.4"], venv_cache_path=VENV_CACHE_PATH + task_id=f"venv_{option}", requirements=["numpy~=1.26.0"], venv_cache_path=VENV_CACHE_PATH ) def some_venv_task(): import numpy as np diff --git a/airflow/example_dags/example_dataset_alias.py b/airflow/example_dags/example_dataset_alias.py index c50a89e34fb8c..4bfc6f51a7351 100644 --- a/airflow/example_dags/example_dataset_alias.py +++ b/airflow/example_dags/example_dataset_alias.py @@ -67,7 +67,7 @@ def produce_dataset_events(): def produce_dataset_events_through_dataset_alias(*, outlet_events=None): bucket_name = "bucket" object_path = "my-task" - outlet_events["example-alias"].add(Dataset(f"s3://{bucket_name}/{object_path}")) + outlet_events[DatasetAlias("example-alias")].add(Dataset(f"s3://{bucket_name}/{object_path}")) produce_dataset_events_through_dataset_alias() diff --git a/airflow/example_dags/example_dataset_alias_with_no_taskflow.py b/airflow/example_dags/example_dataset_alias_with_no_taskflow.py index 7d7227af39f50..72863618e3949 100644 --- a/airflow/example_dags/example_dataset_alias_with_no_taskflow.py +++ b/airflow/example_dags/example_dataset_alias_with_no_taskflow.py @@ -68,7 +68,9 @@ def produce_dataset_events(): def produce_dataset_events_through_dataset_alias_with_no_taskflow(*, outlet_events=None): bucket_name = "bucket" object_path = "my-task" - outlet_events["example-alias-no-taskflow"].add(Dataset(f"s3://{bucket_name}/{object_path}")) + outlet_events[DatasetAlias("example-alias-no-taskflow")].add( + Dataset(f"s3://{bucket_name}/{object_path}") + ) PythonOperator( task_id="produce_dataset_events_through_dataset_alias_with_no_taskflow", diff --git a/airflow/example_dags/example_inlet_event_extra.py b/airflow/example_dags/example_inlet_event_extra.py index 4b7567fc2f87e..b07faf2bdfe0b 100644 --- a/airflow/example_dags/example_inlet_event_extra.py +++ b/airflow/example_dags/example_inlet_event_extra.py @@ -57,5 +57,5 @@ def read_dataset_event(*, inlet_events=None): BashOperator( task_id="read_dataset_event_from_classic", inlets=[ds], - bash_command="echo '{{ inlet_events['s3://output/1.txt'][-1].extra | tojson }}'", + bash_command="echo '{{ inlet_events[Dataset('s3://output/1.txt')][-1].extra | tojson }}'", ) diff --git a/airflow/executors/base_executor.py b/airflow/executors/base_executor.py index 57568af199710..5a5cf2d73f15d 100644 --- a/airflow/executors/base_executor.py +++ b/airflow/executors/base_executor.py @@ -26,6 +26,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple import pendulum +from deprecated import deprecated from airflow.cli.cli_config import DefaultHelpParser from airflow.configuration import conf @@ -545,7 +546,12 @@ def terminate(self): """Get called when the daemon receives a SIGTERM.""" raise NotImplementedError() - def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: # pragma: no cover + @deprecated( + reason="Replaced by function `revoke_task`.", + category=RemovedInAirflow3Warning, + action="ignore", + ) + def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: """ Handle remnants of tasks that were failed because they were stuck in queued. @@ -556,7 +562,23 @@ def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: # p :param tis: List of Task Instances to clean up :return: List of readable task instances for a warning message """ - raise NotImplementedError() + raise NotImplementedError + + def revoke_task(self, *, ti: TaskInstance): + """ + Attempt to remove task from executor. + + It should attempt to ensure that the task is no longer running on the worker, + and ensure that it is cleared out from internal data structures. + + It should *not* change the state of the task in airflow, or add any events + to the event buffer. + + It should not raise any error. + + :param ti: Task instance to remove + """ + raise NotImplementedError def try_adopt_task_instances(self, tis: Sequence[TaskInstance]) -> Sequence[TaskInstance]: """ diff --git a/airflow/jobs/backfill_job_runner.py b/airflow/jobs/backfill_job_runner.py index 961c4b7e020b3..305eaff84be7d 100644 --- a/airflow/jobs/backfill_job_runner.py +++ b/airflow/jobs/backfill_job_runner.py @@ -309,6 +309,17 @@ def _manage_executor_state( self.log.debug("Executor state: %s task %s", state, ti) + if ( + state in (TaskInstanceState.FAILED, TaskInstanceState.SUCCESS) + and ti.state in self.STATES_COUNT_AS_RUNNING + ): + self.log.debug( + "In-memory TaskInstance state %s does not agree with executor state %s. Attempting to resolve by refreshing in-memory task instance from DB.", + ti, + state, + ) + ti.refresh_from_db(session=session) + if ( state in (TaskInstanceState.FAILED, TaskInstanceState.SUCCESS) and ti.state in self.STATES_COUNT_AS_RUNNING diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index aa4e8d4f26aea..c9afd40f719ed 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -25,12 +25,14 @@ import time import warnings from collections import Counter, defaultdict, deque +from contextlib import suppress from dataclasses import dataclass from datetime import timedelta from functools import lru_cache, partial from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, Collection, Iterable, Iterator +from deprecated import deprecated from sqlalchemy import and_, delete, func, not_, or_, select, text, update from sqlalchemy.exc import OperationalError from sqlalchemy.orm import lazyload, load_only, make_transient, selectinload @@ -97,6 +99,9 @@ DR = DagRun DM = DagModel +TASK_STUCK_IN_QUEUED_RESCHEDULE_EVENT = "stuck in queued reschedule" +""":meta private:""" + @dataclass class ConcurrencyMap: @@ -228,6 +233,13 @@ def __init__( stalled_task_timeout, task_adoption_timeout, worker_pods_pending_timeout, task_queued_timeout ) + # this param is intentionally undocumented + self._num_stuck_queued_retries = conf.getint( + section="scheduler", + key="num_stuck_in_queued_retries", + fallback=2, + ) + self.do_pickle = do_pickle if log: @@ -1093,7 +1105,7 @@ def _run_scheduler_loop(self) -> None: timers.call_regular_interval( conf.getfloat("scheduler", "task_queued_timeout_check_interval"), - self._fail_tasks_stuck_in_queued, + self._handle_tasks_stuck_in_queued, ) timers.call_regular_interval( @@ -1141,6 +1153,7 @@ def _run_scheduler_loop(self) -> None: for executor in self.job.executors: try: # this is backcompat check if executor does not inherit from BaseExecutor + # todo: remove in airflow 3.0 if not hasattr(executor, "_task_event_logs"): continue with create_session() as session: @@ -1772,48 +1785,132 @@ def _send_sla_callbacks_to_processor(self, dag: DAG) -> None: self.job.executor.send_callback(request) @provide_session - def _fail_tasks_stuck_in_queued(self, session: Session = NEW_SESSION) -> None: + def _handle_tasks_stuck_in_queued(self, session: Session = NEW_SESSION) -> None: """ - Mark tasks stuck in queued for longer than `task_queued_timeout` as failed. + Handle the scenario where a task is queued for longer than `task_queued_timeout`. Tasks can get stuck in queued for a wide variety of reasons (e.g. celery loses track of a task, a cluster can't further scale up its workers, etc.), but tasks - should not be stuck in queued for a long time. This will mark tasks stuck in - queued for longer than `self._task_queued_timeout` as failed. If the task has - available retries, it will be retried. + should not be stuck in queued for a long time. + + We will attempt to requeue the task (by revoking it from executor and setting to + scheduled) up to 2 times before failing the task. """ - self.log.debug("Calling SchedulerJob._fail_tasks_stuck_in_queued method") + tasks_stuck_in_queued = self._get_tis_stuck_in_queued(session) + for executor, stuck_tis in self._executor_to_tis(tasks_stuck_in_queued).items(): + try: + for ti in stuck_tis: + executor.revoke_task(ti=ti) + self._maybe_requeue_stuck_ti( + ti=ti, + session=session, + ) + except NotImplementedError: + # this block only gets entered if the executor has not implemented `revoke_task`. + # in which case, we try the fallback logic + # todo: remove the call to _stuck_in_queued_backcompat_logic in airflow 3.0. + # after 3.0, `cleanup_stuck_queued_tasks` will be removed, so we should + # just continue immediately. + self._stuck_in_queued_backcompat_logic(executor, stuck_tis) + continue - tasks_stuck_in_queued = session.scalars( + def _get_tis_stuck_in_queued(self, session) -> Iterable[TaskInstance]: + """Query db for TIs that are stuck in queued.""" + return session.scalars( select(TI).where( TI.state == TaskInstanceState.QUEUED, TI.queued_dttm < (timezone.utcnow() - timedelta(seconds=self._task_queued_timeout)), TI.queued_by_job_id == self.job.id, ) - ).all() + ) - for executor, stuck_tis in self._executor_to_tis(tasks_stuck_in_queued).items(): - try: - cleaned_up_task_instances = set(executor.cleanup_stuck_queued_tasks(tis=stuck_tis)) - for ti in stuck_tis: - if repr(ti) in cleaned_up_task_instances: - self.log.warning( - "Marking task instance %s stuck in queued as failed. " - "If the task instance has available retries, it will be retried.", - ti, - ) - session.add( - Log( - event="stuck in queued", - task_instance=ti.key, - extra=( - "Task will be marked as failed. If the task instance has " - "available retries, it will be retried." - ), - ) - ) - except NotImplementedError: - self.log.debug("Executor doesn't support cleanup of stuck queued tasks. Skipping.") + def _maybe_requeue_stuck_ti(self, *, ti, session): + """ + Requeue task if it has not been attempted too many times. + + Otherwise, fail it. + """ + num_times_stuck = self._get_num_times_stuck_in_queued(ti, session) + if num_times_stuck < self._num_stuck_queued_retries: + self.log.info("Task stuck in queued; will try to requeue. task_id=%s", ti.task_id) + session.add( + Log( + event=TASK_STUCK_IN_QUEUED_RESCHEDULE_EVENT, + task_instance=ti.key, + extra=( + f"Task was in queued state for longer than {self._task_queued_timeout} " + "seconds; task state will be set back to scheduled." + ), + ) + ) + self._reschedule_stuck_task(ti) + else: + self.log.info( + "Task requeue attempts exceeded max; marking failed. task_instance=%s", + ti, + ) + session.add( + Log( + event="stuck in queued tries exceeded", + task_instance=ti.key, + extra=f"Task was requeued more than {self._num_stuck_queued_retries} times and will be failed.", + ) + ) + ti.set_state(TaskInstanceState.FAILED, session=session) + + @deprecated( + reason="This is backcompat layer for older executor interface. Should be removed in 3.0", + category=RemovedInAirflow3Warning, + action="ignore", + ) + def _stuck_in_queued_backcompat_logic(self, executor, stuck_tis): + """ + Try to invoke stuck in queued cleanup for older executor interface. + + TODO: remove in airflow 3.0 + + Here we handle case where the executor pre-dates the interface change that + introduced `cleanup_tasks_stuck_in_queued` and deprecated `cleanup_stuck_queued_tasks`. + + """ + with suppress(NotImplementedError): + for ti_repr in executor.cleanup_stuck_queued_tasks(tis=stuck_tis): + self.log.warning( + "Task instance %s stuck in queued. Will be set to failed.", + ti_repr, + ) + + @provide_session + def _reschedule_stuck_task(self, ti, session=NEW_SESSION): + session.execute( + update(TI) + .where(TI.filter_for_tis([ti])) + .values( + state=TaskInstanceState.SCHEDULED, + queued_dttm=None, + ) + .execution_options(synchronize_session=False) + ) + + @provide_session + def _get_num_times_stuck_in_queued(self, ti: TaskInstance, session: Session = NEW_SESSION) -> int: + """ + Check the Log table to see how many times a taskinstance has been stuck in queued. + + We can then use this information to determine whether to reschedule a task or fail it. + """ + return ( + session.query(Log) + .where( + Log.task_id == ti.task_id, + Log.dag_id == ti.dag_id, + Log.run_id == ti.run_id, + Log.map_index == ti.map_index, + Log.try_number == ti.try_number, + Log.event == TASK_STUCK_IN_QUEUED_RESCHEDULE_EVENT, + ) + .count() + ) @provide_session def _emit_pool_metrics(self, session: Session = NEW_SESSION) -> None: @@ -2102,7 +2199,7 @@ def _orphan_unreferenced_datasets(self, session: Session = NEW_SESSION) -> None: updated_count = sum(self._set_orphaned(dataset) for dataset in orphaned_dataset_query) Stats.gauge("dataset.orphaned", updated_count) - def _executor_to_tis(self, tis: list[TaskInstance]) -> dict[BaseExecutor, list[TaskInstance]]: + def _executor_to_tis(self, tis: Iterable[TaskInstance]) -> dict[BaseExecutor, list[TaskInstance]]: """Organize TIs into lists per their respective executor.""" _executor_to_tis: defaultdict[BaseExecutor, list[TaskInstance]] = defaultdict(list) for ti in tis: diff --git a/airflow/metrics/validators.py b/airflow/metrics/validators.py index 111ad9b87df62..d69e57762c23a 100644 --- a/airflow/metrics/validators.py +++ b/airflow/metrics/validators.py @@ -44,7 +44,7 @@ class MetricNameLengthExemptionWarning(Warning): # Only characters in the character set are considered valid # for the stat_name if stat_name_default_handler is used. -ALLOWED_CHARACTERS = frozenset(string.ascii_letters + string.digits + "_.-") +ALLOWED_CHARACTERS = frozenset(string.ascii_letters + string.digits + "_.-/") # The following set contains existing metrics whose names are too long for # OpenTelemetry and should be deprecated over time. This is implemented to diff --git a/airflow/models/abstractoperator.py b/airflow/models/abstractoperator.py index 45eb3c5fff189..ec3d1f5309adb 100644 --- a/airflow/models/abstractoperator.py +++ b/airflow/models/abstractoperator.py @@ -40,7 +40,7 @@ from airflow.utils.task_group import MappedTaskGroup from airflow.utils.trigger_rule import TriggerRule from airflow.utils.types import NOTSET, ArgNotSet -from airflow.utils.weight_rule import WeightRule +from airflow.utils.weight_rule import WeightRule, db_safe_priority TaskStateChangeCallback = Callable[[Context], None] @@ -467,7 +467,7 @@ def priority_weight_total(self) -> int: ) if isinstance(self.weight_rule, _AbsolutePriorityWeightStrategy): - return self.priority_weight + return db_safe_priority(self.priority_weight) elif isinstance(self.weight_rule, _DownstreamPriorityWeightStrategy): upstream = False elif isinstance(self.weight_rule, _UpstreamPriorityWeightStrategy): @@ -476,10 +476,13 @@ def priority_weight_total(self) -> int: upstream = False dag = self.get_dag() if dag is None: - return self.priority_weight - return self.priority_weight + sum( - dag.task_dict[task_id].priority_weight - for task_id in self.get_flat_relative_ids(upstream=upstream) + return db_safe_priority(self.priority_weight) + return db_safe_priority( + self.priority_weight + + sum( + dag.task_dict[task_id].priority_weight + for task_id in self.get_flat_relative_ids(upstream=upstream) + ) ) @cached_property diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 449678860f80b..773552184f103 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -365,6 +365,11 @@ def partial( partial_kwargs["end_date"] = timezone.convert_to_utc(partial_kwargs["end_date"]) if partial_kwargs["pool"] is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME + if partial_kwargs["pool_slots"] < 1: + dag_str = "" + if dag: + dag_str = f" in dag {dag.dag_id}" + raise ValueError(f"pool slots for {task_id}{dag_str} cannot be less than 1") partial_kwargs["retries"] = parse_retries(partial_kwargs["retries"]) partial_kwargs["retry_delay"] = coerce_timedelta(partial_kwargs["retry_delay"], key="retry_delay") if partial_kwargs["max_retry_delay"] is not None: @@ -656,6 +661,8 @@ class derived from this one results in the creation of a task object, This allows the executor to trigger higher priority tasks before others when things get backed up. Set priority_weight as a higher number for more important tasks. + As not all database engines support 64-bit integers, values are capped with 32-bit. + Valid range is from -2,147,483,648 to 2,147,483,647. :param weight_rule: weighting method used for the effective total priority weight of the task. Options are: ``{ downstream | upstream | absolute }`` default is ``downstream`` @@ -677,7 +684,8 @@ class derived from this one results in the creation of a task object, Additionally, when set to ``absolute``, there is bonus effect of significantly speeding up the task creation process as for very large DAGs. Options can be set as string or using the constants defined in - the static class ``airflow.utils.WeightRule`` + the static class ``airflow.utils.WeightRule``. + Irrespective of the weight rule, resulting priority values are capped with 32-bit. |experimental| Since 2.9.0, Airflow allows to define custom priority weight strategy, by creating a subclass of diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml index 921cef1c89adf..253e4e793cff1 100644 --- a/airflow/reproducible_build.yaml +++ b/airflow/reproducible_build.yaml @@ -1,2 +1,2 @@ -release-notes-hash: 6aa54b840e9fc2e48cf7046507e6930b -source-date-epoch: 1730460817 +release-notes-hash: 0867869dba7304e7ead28dd0800c5c4b +source-date-epoch: 1733822937 diff --git a/airflow/utils/context.py b/airflow/utils/context.py index a72885401f7b2..9dddcc3f16cd8 100644 --- a/airflow/utils/context.py +++ b/airflow/utils/context.py @@ -177,6 +177,14 @@ class OutletEventAccessor: def add(self, dataset: Dataset | str, extra: dict[str, Any] | None = None) -> None: """Add a DatasetEvent to an existing Dataset.""" if isinstance(dataset, str): + warnings.warn( + ( + "Emitting dataset events using string is deprecated and will be removed in Airflow 3. " + "Please use the Dataset object (renamed as Asset in Airflow 3) directly" + ), + DeprecationWarning, + stacklevel=2, + ) dataset_uri = dataset elif isinstance(dataset, Dataset): dataset_uri = dataset.uri @@ -216,6 +224,16 @@ def __len__(self) -> int: return len(self._dict) def __getitem__(self, key: str | Dataset | DatasetAlias) -> OutletEventAccessor: + if isinstance(key, str): + warnings.warn( + ( + "Accessing outlet_events using string is deprecated and will be removed in Airflow 3. " + "Please use the Dataset or DatasetAlias object (renamed as Asset and AssetAlias in Airflow 3) directly" + ), + DeprecationWarning, + stacklevel=2, + ) + event_key = extract_event_key(key) if event_key not in self._dict: self._dict[event_key] = OutletEventAccessor(extra={}, raw_key=key) @@ -282,6 +300,15 @@ def __getitem__(self, key: int | str | Dataset | DatasetAlias) -> LazyDatasetEve join_clause = DatasetEvent.source_aliases where_clause = DatasetAliasModel.name == dataset_alias.name elif isinstance(obj, (Dataset, str)): + if isinstance(obj, str): + warnings.warn( + ( + "Accessing inlet_events using string is deprecated and will be removed in Airflow 3. " + "Please use the Dataset object (renamed as Asset in Airflow 3) directly" + ), + DeprecationWarning, + stacklevel=2, + ) dataset = self._datasets[extract_event_key(obj)] join_clause = DatasetEvent.dataset where_clause = DatasetModel.uri == dataset.uri diff --git a/airflow/utils/log/file_task_handler.py b/airflow/utils/log/file_task_handler.py index e99ffae0c94d8..9eb55c707f180 100644 --- a/airflow/utils/log/file_task_handler.py +++ b/airflow/utils/log/file_task_handler.py @@ -416,7 +416,11 @@ def _read( ) ) log_pos = len(logs) - messages = "".join([f"*** {x}\n" for x in messages_list]) + # Log message source details are grouped: they are not relevant for most users and can + # distract them from finding the root cause of their errors + messages = " INFO - ::group::Log message source details\n" + messages += "".join([f"*** {x}\n" for x in messages_list]) + messages += " INFO - ::endgroup::\n" end_of_log = ti.try_number != try_number or ti.state not in ( TaskInstanceState.RUNNING, TaskInstanceState.DEFERRED, diff --git a/airflow/utils/task_group.py b/airflow/utils/task_group.py index d1dd9822be222..2a4dadf5fd6ad 100644 --- a/airflow/utils/task_group.py +++ b/airflow/utils/task_group.py @@ -37,6 +37,7 @@ from airflow.models.taskmixin import DAGNode from airflow.serialization.enums import DagAttributeTypes from airflow.utils.helpers import validate_group_key, validate_instance_args +from airflow.utils.trigger_rule import TriggerRule if TYPE_CHECKING: from sqlalchemy.orm import Session @@ -220,10 +221,15 @@ def parent_group(self) -> TaskGroup | None: def __iter__(self): for child in self.children.values(): - if isinstance(child, TaskGroup): - yield from child - else: - yield child + yield from self._iter_child(child) + + @staticmethod + def _iter_child(child): + """Iterate over the children of this TaskGroup.""" + if isinstance(child, TaskGroup): + yield from child + else: + yield child def add(self, task: DAGNode) -> DAGNode: """ @@ -599,6 +605,16 @@ def __init__(self, *, expand_input: ExpandInput, **kwargs: Any) -> None: super().__init__(**kwargs) self._expand_input = expand_input + def __iter__(self): + from airflow.models.abstractoperator import AbstractOperator + + for child in self.children.values(): + if isinstance(child, AbstractOperator) and child.trigger_rule == TriggerRule.ALWAYS: + raise ValueError( + "Task-generated mapping within a mapped task group is not allowed with trigger rule 'always'" + ) + yield from self._iter_child(child) + def iter_mapped_dependencies(self) -> Iterator[Operator]: """Upstream dependencies that provide XComs used by this mapped task group.""" from airflow.models.xcom_arg import XComArg diff --git a/airflow/utils/usage_data_collection.py b/airflow/utils/usage_data_collection.py index fe86a2da1cb50..3bdfb180fa912 100644 --- a/airflow/utils/usage_data_collection.py +++ b/airflow/utils/usage_data_collection.py @@ -25,6 +25,7 @@ from __future__ import annotations +import os import platform from urllib.parse import urlencode @@ -43,6 +44,10 @@ def usage_data_collection(): if _version_is_prerelease(airflow_version): return + # Exclude CI environments + if _is_ci_environ(): + return + scarf_domain = "https://apacheairflow.gateway.scarf.sh/scheduler" try: @@ -70,6 +75,26 @@ def _version_is_prerelease(version: str) -> bool: return parse(version).is_prerelease +def _is_ci_environ() -> bool: + """Return True if running in any known CI environment.""" + if os.getenv("CI") == "true": + # Generic CI variable set by many CI systems (GH Actions, Travis, GitLab, CircleCI, Jenkins, Heroku) + return True + + # Other CI variables set by specific CI systems + ci_env_vars = { + "CIRCLECI", # CircleCI + "CODEBUILD_BUILD_ID", # AWS CodeBuild + "GITHUB_ACTIONS", # GitHub Actions + "GITLAB_CI", # GitLab CI + "JENKINS_URL", # Jenkins + "TF_BUILD", # Azure Pipelines + "TRAVIS", # Travis CI + } + + return any(var in os.environ for var in ci_env_vars) + + def get_platform_info() -> tuple[str, str]: return platform.system(), platform.machine() diff --git a/airflow/utils/weight_rule.py b/airflow/utils/weight_rule.py index a63358b0322ce..490bcfbe88843 100644 --- a/airflow/utils/weight_rule.py +++ b/airflow/utils/weight_rule.py @@ -21,6 +21,18 @@ import methodtools +# Databases do not support arbitrary precision integers, so we need to limit the range of priority weights. +# postgres: -2147483648 to +2147483647 (see https://www.postgresql.org/docs/current/datatype-numeric.html) +# mysql: -2147483648 to +2147483647 (see https://dev.mysql.com/doc/refman/8.4/en/integer-types.html) +# sqlite: -9223372036854775808 to +9223372036854775807 (see https://sqlite.org/datatype3.html) +DB_SAFE_MINIMUM = -2147483648 +DB_SAFE_MAXIMUM = 2147483647 + + +def db_safe_priority(priority_weight: int) -> int: + """Convert priority weight to a safe value for the database.""" + return max(DB_SAFE_MINIMUM, min(DB_SAFE_MAXIMUM, priority_weight)) + class WeightRule(str, Enum): """Weight rules.""" diff --git a/airflow/www/static/js/api/useTIHistory.ts b/airflow/www/static/js/api/useTIHistory.ts index d90ce91f030db..1d1ee1d40f586 100644 --- a/airflow/www/static/js/api/useTIHistory.ts +++ b/airflow/www/static/js/api/useTIHistory.ts @@ -48,7 +48,7 @@ export default function useTIHistory({ .replace("_DAG_RUN_ID_", dagRunId) .replace("_TASK_ID_", taskId); - if (mapIndex && mapIndex > -1) { + if (mapIndex !== undefined && mapIndex > -1) { tiHistoryUrl = tiHistoryUrl.replace("/tries", `/${mapIndex}/tries`); } diff --git a/airflow/www/static/js/dag/details/gantt/index.tsx b/airflow/www/static/js/dag/details/gantt/index.tsx index 45c10d2b525e5..1ed5c353debba 100644 --- a/airflow/www/static/js/dag/details/gantt/index.tsx +++ b/airflow/www/static/js/dag/details/gantt/index.tsx @@ -144,20 +144,10 @@ const Gantt = ({ // Reset state when the dagrun changes useEffect(() => { - if (startDate !== dagRun?.queuedAt && startDate !== dagRun?.startDate) { - setStartDate(dagRun?.queuedAt || dagRun?.startDate); - } - if (!endDate || endDate !== dagRun?.endDate) { - // @ts-ignore - setEndDate(dagRun?.endDate ?? moment().add(1, "s").toString()); - } - }, [ - dagRun?.queuedAt, - dagRun?.startDate, - dagRun?.endDate, - startDate, - endDate, - ]); + setStartDate(dagRun?.queuedAt || dagRun?.startDate); + // @ts-ignore + setEndDate(dagRun?.endDate ?? moment().add(1, "s").toString()); + }, [dagRun?.queuedAt, dagRun?.startDate, dagRun?.endDate]); const numBars = Math.round(width / 100); const runDuration = getDuration(startDate, endDate); diff --git a/airflow/www/static/js/dag/details/taskInstance/ExtraLinks.tsx b/airflow/www/static/js/dag/details/taskInstance/ExtraLinks.tsx index 06528eab6e7a1..1eb59cb9b1a65 100644 --- a/airflow/www/static/js/dag/details/taskInstance/ExtraLinks.tsx +++ b/airflow/www/static/js/dag/details/taskInstance/ExtraLinks.tsx @@ -55,7 +55,7 @@ const ExtraLinks = ({ const isSanitised = (url: string | null) => { if (!url) { - return true; + return false; // Empty or null urls should cause the link to be disabled } const urlRegex = /^(https?:)/i; return urlRegex.test(url); diff --git a/airflow/www/static/js/dag/details/taskInstance/Logs/utils.test.tsx b/airflow/www/static/js/dag/details/taskInstance/Logs/utils.test.tsx index 8e577068d27e0..57cad4314f6ff 100644 --- a/airflow/www/static/js/dag/details/taskInstance/Logs/utils.test.tsx +++ b/airflow/www/static/js/dag/details/taskInstance/Logs/utils.test.tsx @@ -19,27 +19,60 @@ /* global describe, test, expect */ +import { AnsiUp } from "ansi_up"; import { LogLevel, parseLogs } from "./utils"; -const mockTaskLog = ` -5d28cfda3219 +const mockTaskLogInfoBegin = `5d28cfda3219 *** Reading local file: /root/airflow/logs/dag_id=test_ui_grid/run_id=scheduled__2022-06-03T00:00:00+00:00/task_id=section_1.get_entry_group/attempt=1.log [2022-06-04 00:00:01,901] {taskinstance.py:1132} INFO - Dependencies all met for [2022-06-04 00:00:01,906] {taskinstance.py:1132} INFO - Dependencies all met for [2022-06-04 00:00:01,906] {taskinstance.py:1329} INFO - --------------------------------------------------------------------------------- [2022-06-04 00:00:01,906] {taskinstance.py:1330} INFO - Starting attempt 1 of 1 [2022-06-04 00:00:01,906] {taskinstance.py:1331} INFO - --------------------------------------------------------------------------------- -[2022-06-04 00:00:01,916] {taskinstance.py:1350} INFO - Executing on 2022-06-03 00:00:00+00:00 +`; +const mockTaskLogErrorWithTraceback = `[2022-06-04 00:00:01,910] {taskinstance.py:3311} ERROR - Task failed with exception +Traceback (most recent call last): + File "/opt/airflow/airflow/models/taskinstance.py", line 767, in _execute_task + result = _execute_callable(context=context, **execute_callable_kwargs) + File "/opt/airflow/airflow/models/taskinstance.py", line 733, in _execute_callable + return ExecutionCallableRunner( + File "/opt/airflow/airflow/utils/operator_helpers.py", line 252, in run + return self.func(*args, **kwargs) + File "/opt/airflow/airflow/models/baseoperator.py", line 422, in wrapper + return func(self, *args, **kwargs) + File "/opt/airflow/airflow/operators/python.py", line 505, in execute + return super().execute(context=serializable_context) + File "/opt/airflow/airflow/models/baseoperator.py", line 422, in wrapper + return func(self, *args, **kwargs) + File "/opt/airflow/airflow/operators/python.py", line 238, in execute + return_value = self.execute_callable() + File "/opt/airflow/airflow/operators/python.py", line 870, in execute_callable + result = self._execute_python_callable_in_subprocess(python_path) + File "/opt/airflow/airflow/operators/python.py", line 588, in _execute_python_callable_in_subprocess + raise AirflowException(error_msg) from None +airflow.exceptions.AirflowException: Process returned non-zero exit status 1. +This is log line 1 +This is log line 2 +This is log line 3 +This is log line 4 +This is log line 5 +`; +const mockTaskLogWarning = `[2022-06-04 00:00:02,010] {taskinstance.py:1548} WARNING - Exporting env vars: AIRFLOW_CTX_DAG_OWNER=*** AIRFLOW_CTX_DAG_ID=test_ui_grid`; +const mockTaskLogInfoEndWithWarningAndUrl = `[2022-06-04 00:00:01,914] {taskinstance.py:1225} INFO - Marking task as FAILED. dag_id=reproduce_log_error_dag, task_id=reproduce_log_error_python_task2, run_id=manual__2024-11-30T02:18:22.203608+00:00, execution_date=20241130T021822, start_date=20241130T021842, end_date=20241130T021844 [2022-06-04 00:00:01,919] {standard_task_runner.py:52} INFO - Started process 41646 to run task [2022-06-04 00:00:01,920] {standard_task_runner.py:80} INFO - Running: ['***', 'tasks', 'run', 'test_ui_grid', 'section_1.get_entry_group', 'scheduled__2022-06-03T00:00:00+00:00', '--job-id', '1626', '--raw', '--subdir', 'DAGS_FOLDER/test_ui_grid.py', '--cfg-path', '/tmp/tmpte7k80ur'] [2022-06-04 00:00:01,921] {standard_task_runner.py:81} INFO - Job 1626: Subtask section_1.get_entry_group [2022-06-04 00:00:01,921] {dagbag.py:507} INFO - Filling up the DagBag from /files/dags/test_ui_grid.py [2022-06-04 00:00:01,964] {task_command.py:377} INFO - Running on host 5d28cfda3219 -[2022-06-04 00:00:02,010] {taskinstance.py:1548} WARNING - Exporting env vars: AIRFLOW_CTX_DAG_OWNER=*** AIRFLOW_CTX_DAG_ID=test_ui_grid -[2024-07-01 00:00:02,010] {taskinstance.py:1548} INFO - Url parsing test => "https://apple.com", "https://google.com", https://something.logs/_dashboard/?_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:now-1d,to:now))&_a=(columns:!(_source),filters:!(('$state':(store:appState)))) -`; +${mockTaskLogWarning} +[2024-07-01 00:00:02,010] {taskinstance.py:1548} INFO - Url parsing test => "https://apple.com", "https://google.com", https://something.logs/_dashboard/?_g=(filters:!(),refreshInterval:(pause:!t,value:0),time:(from:now-1d,to:now))&_a=(columns:!(_source),filters:!(('$state':(store:appState))))`; + +const mockTaskLog = `${mockTaskLogInfoBegin}${mockTaskLogErrorWithTraceback}${mockTaskLogInfoEndWithWarningAndUrl}`; +const ansiUp = new AnsiUp(); +const parseExpectedLogs = (logs: string) => { + ansiUp.url_allowlist = {}; + return logs.split("\n").map((line) => ansiUp.ansi_to_html(line)); +}; describe("Test Logs Utils.", () => { test("parseLogs function replaces datetimes", () => { @@ -65,13 +98,18 @@ describe("Test Logs Utils.", () => { test.each([ { logLevelFilters: [LogLevel.INFO], - expectedNumberOfLines: 12, + expectedNumberOfLines: 14, expectedNumberOfFileSources: 4, + expectedLogs: `${mockTaskLogInfoBegin}${mockTaskLogInfoEndWithWarningAndUrl.replace( + mockTaskLogWarning, + "" + )}`, }, { logLevelFilters: [LogLevel.WARNING], expectedNumberOfLines: 1, expectedNumberOfFileSources: 1, + expectedLogs: mockTaskLogWarning, }, ])( "Filtering logs on $logLevelFilters level should return $expectedNumberOfLines lines and $expectedNumberOfFileSources file sources", @@ -79,6 +117,7 @@ describe("Test Logs Utils.", () => { logLevelFilters, expectedNumberOfLines, expectedNumberOfFileSources, + expectedLogs, }) => { const { parsedLogs, fileSources } = parseLogs( mockTaskLog, @@ -91,8 +130,11 @@ describe("Test Logs Utils.", () => { expect(fileSources).toHaveLength(expectedNumberOfFileSources); expect(parsedLogs).toBeDefined(); const lines = parsedLogs!.split("\n"); + const expectedLines = parseExpectedLogs(expectedLogs); expect(lines).toHaveLength(expectedNumberOfLines); - lines.forEach((line) => expect(line).toContain(logLevelFilters[0])); + lines.forEach((line, index) => { + expect(line).toContain(expectedLines[index]); + }); } ); @@ -104,6 +146,14 @@ describe("Test Logs Utils.", () => { ["taskinstance.py"], [] ); + const expectedLogs = `[2022-06-04 00:00:01,901] {taskinstance.py:1132} INFO - Dependencies all met for +[2022-06-04 00:00:01,906] {taskinstance.py:1132} INFO - Dependencies all met for +[2022-06-04 00:00:01,906] {taskinstance.py:1329} INFO - +[2022-06-04 00:00:01,906] {taskinstance.py:1330} INFO - Starting attempt 1 of 1 +[2022-06-04 00:00:01,906] {taskinstance.py:1331} INFO - +${mockTaskLogErrorWithTraceback} +${mockTaskLogWarning} +[2024-07-01 00:00:02,010] {taskinstance.py:1548} INFO -`; // Ignore matching for transformed hyperlinks; only verify that all the correct lines are returned. expect(fileSources).toEqual([ "dagbag.py", @@ -112,8 +162,11 @@ describe("Test Logs Utils.", () => { "taskinstance.py", ]); const lines = parsedLogs!.split("\n"); - expect(lines).toHaveLength(8); - lines.forEach((line) => expect(line).toContain("taskinstance.py")); + const expectedLines = parseExpectedLogs(expectedLogs); + expect(lines).toHaveLength(34); + lines.forEach((line, index) => { + expect(line).toContain(expectedLines[index]); + }); }); test("parseLogs function with filter on log level and file source", () => { @@ -145,7 +198,8 @@ describe("Test Logs Utils.", () => { [] ); - const lines = parsedLogs!.split("\n"); + // remove the last line which is empty + const lines = parsedLogs!.split("\n").filter((line) => line.length > 0); expect(lines[lines.length - 1]).toContain( 'https://apple.com' ); diff --git a/airflow/www/static/js/dag/details/taskInstance/Logs/utils.ts b/airflow/www/static/js/dag/details/taskInstance/Logs/utils.ts index f5340f0afb82a..b35a713484e14 100644 --- a/airflow/www/static/js/dag/details/taskInstance/Logs/utils.ts +++ b/airflow/www/static/js/dag/details/taskInstance/Logs/utils.ts @@ -59,7 +59,7 @@ export const logGroupEnd = / INFO - (::|##\[])endgroup(::|\])/g; export const parseLogs = ( data: string | undefined, timezone: string | null, - logLevelFilters: Array, + logLevelFilters: Array, fileSourceFilters: Array, unfoldedLogGroups: Array ) => { @@ -79,6 +79,8 @@ export const parseLogs = ( const parsedLines: Array = []; const fileSources: Set = new Set(); + const targetLogLevels: Set = new Set(logLevelFilters); + const targetFileSources: Set = new Set(fileSourceFilters); const ansiUp = new AnsiUp(); ansiUp.url_allowlist = {}; @@ -87,24 +89,20 @@ export const parseLogs = ( // Coloring (blue-60 as chakra style, is #0060df) and style such that log group appears like a link const logGroupStyle = "color:#0060df;cursor:pointer;font-weight:bold;"; + // Example Log Format: [2021-08-26 00:00:00,000] {filename.py:42} INFO - Log message + const regExp = /\[(.*?)\] \{(.*?)\} (.*?) -/; + let currentLevel: LogLevel = LogLevel.INFO; + let currentFileSource = ""; lines.forEach((line) => { let parsedLine = line; - - // Apply log level filter. - if ( - logLevelFilters.length > 0 && - logLevelFilters.every((level) => !line.includes(level)) - ) { - return; - } - - const regExp = /\[(.*?)\] \{(.*?)\}/; const matches = line.match(regExp); - let logGroup = ""; + let fileSource = ""; if (matches) { // Replace UTC with the local timezone. const dateTime = matches[1]; - [logGroup] = matches[2].split(":"); + [fileSource] = matches[2].split(":"); + const logLevel = matches[3]; + if (dateTime && timezone) { // @ts-ignore const localDateTime = moment @@ -115,50 +113,63 @@ export const parseLogs = ( parsedLine = line.replace(dateTime, localDateTime); } - fileSources.add(logGroup); + // The `currentLogLevel` and `currentFileSource` should remain same + // until a new `logLevel` or `fileSource` is encountered. + currentLevel = logLevel as LogLevel; + currentFileSource = fileSource; } + // Apply log level filter. + if (logLevelFilters.length > 0 && !targetLogLevels.has(currentLevel)) { + return; + } + if (fileSource) { + // Only add file source if it is not empty. + fileSources.add(fileSource); + } + // Apply file source filter. if ( - fileSourceFilters.length === 0 || - fileSourceFilters.some((fileSourceFilter) => - line.includes(fileSourceFilter) - ) + fileSourceFilters.length > 0 && + !targetFileSources.has(currentFileSource) ) { - parsedLine = highlightByKeywords( - parsedLine, - errorKeywords, - warningKeywords, - logGroupStart, - logGroupEnd - ); - // for lines with color convert to nice HTML - const coloredLine = ansiUp.ansi_to_html(parsedLine); - - // for lines with links, transform to hyperlinks - const lineWithHyperlinks = coloredLine - .replace( - urlRegex, - (url) => - `${url}` - ) - .replace(logGroupStart, (textLine) => { - const unfoldIdSuffix = "_unfold"; - const foldIdSuffix = "_fold"; - const gName = textLine.substring(17); - const gId = gName.replace(/\W+/g, "_").toLowerCase(); - const isFolded = unfoldedLogGroups.indexOf(gId) === -1; - const ufDisplay = isFolded ? "" : "display:none;"; - const unfold = ` ▶ ${gName}`; - const fDisplay = isFolded ? "display:none;" : ""; - const fold = ` ▼ ${gName}`; - return unfold + fold; - }) - .replace( - logGroupEnd, - " ▲▲▲ Log group end" - ); - parsedLines.push(lineWithHyperlinks); + return; } + + parsedLine = highlightByKeywords( + parsedLine, + currentLevel, + errorKeywords, + warningKeywords, + logGroupStart, + logGroupEnd + ); + // for lines with color convert to nice HTML + const coloredLine = ansiUp.ansi_to_html(parsedLine); + + // for lines with links, transform to hyperlinks + const lineWithHyperlinks = coloredLine + .replace( + urlRegex, + (url) => + `${url}` + ) + .replace(logGroupStart, (textLine) => { + const unfoldIdSuffix = "_unfold"; + const foldIdSuffix = "_fold"; + const gName = textLine.substring(17); + const gId = gName.replace(/\W+/g, "_").toLowerCase(); + const isFolded = unfoldedLogGroups.indexOf(gId) === -1; + const ufDisplay = isFolded ? "" : "display:none;"; + const unfold = ` ▶ ${gName}`; + const fDisplay = isFolded ? "display:none;" : ""; + const fold = ` ▼ ${gName}`; + return unfold + fold; + }) + .replace( + logGroupEnd, + " ▲▲▲ Log group end" + ); + parsedLines.push(lineWithHyperlinks); }); return { diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts index fac52cf954732..2da17d2981d03 100644 --- a/airflow/www/static/js/types/api-generated.ts +++ b/airflow/www/static/js/types/api-generated.ts @@ -1593,6 +1593,57 @@ export interface components { TaskInstanceCollection: { task_instances?: components["schemas"]["TaskInstance"][]; } & components["schemas"]["CollectionInfo"]; + TaskInstanceHistory: { + task_id?: string; + /** + * @description Human centric display text for the task. + * + * *New in version 2.9.0* + */ + task_display_name?: string; + dag_id?: string; + /** + * @description The DagRun ID for this task instance + * + * *New in version 2.3.0* + */ + dag_run_id?: string; + /** Format: datetime */ + start_date?: string | null; + /** Format: datetime */ + end_date?: string | null; + duration?: number | null; + state?: components["schemas"]["TaskState"]; + try_number?: number; + map_index?: number; + max_tries?: number; + hostname?: string; + unixname?: string; + pool?: string; + pool_slots?: number; + queue?: string | null; + priority_weight?: number | null; + /** @description *Changed in version 2.1.1*: Field becomes nullable. */ + operator?: string | null; + /** @description The datetime that the task enter the state QUEUE, also known as queue_at */ + queued_when?: string | null; + pid?: number | null; + /** + * @description Executor the task is configured to run on or None (which indicates the default executor) + * + * *New in version 2.10.0* + */ + executor?: string | null; + executor_config?: string; + }; + /** + * @description Collection of task instances . + * + * *Changed in version 2.1.0*: 'total_entries' field is added. + */ + TaskInstanceHistoryCollection: { + task_instances_history?: components["schemas"]["TaskInstanceHistory"][]; + } & components["schemas"]["CollectionInfo"]; TaskInstanceReference: { /** @description The task ID. */ task_id?: string; @@ -4355,7 +4406,7 @@ export interface operations { /** Success. */ 200: { content: { - "application/json": components["schemas"]["TaskInstance"]; + "application/json": components["schemas"]["TaskInstanceHistory"]; }; }; 401: components["responses"]["Unauthenticated"]; @@ -4396,7 +4447,7 @@ export interface operations { /** Success. */ 200: { content: { - "application/json": components["schemas"]["TaskInstanceCollection"]; + "application/json": components["schemas"]["TaskInstanceHistoryCollection"]; }; }; 401: components["responses"]["Unauthenticated"]; @@ -4439,7 +4490,7 @@ export interface operations { /** Success. */ 200: { content: { - "application/json": components["schemas"]["TaskInstanceCollection"]; + "application/json": components["schemas"]["TaskInstanceHistoryCollection"]; }; }; 401: components["responses"]["Unauthenticated"]; @@ -4471,7 +4522,7 @@ export interface operations { /** Success. */ 200: { content: { - "application/json": components["schemas"]["TaskInstance"]; + "application/json": components["schemas"]["TaskInstanceHistory"]; }; }; 401: components["responses"]["Unauthenticated"]; @@ -4886,7 +4937,7 @@ export interface operations { "application/json": { content?: string; }; - "plain/text": string; + "text/plain": string; }; }; 401: components["responses"]["Unauthenticated"]; @@ -5554,6 +5605,12 @@ export type TaskInstance = CamelCasedPropertiesDeep< export type TaskInstanceCollection = CamelCasedPropertiesDeep< components["schemas"]["TaskInstanceCollection"] >; +export type TaskInstanceHistory = CamelCasedPropertiesDeep< + components["schemas"]["TaskInstanceHistory"] +>; +export type TaskInstanceHistoryCollection = CamelCasedPropertiesDeep< + components["schemas"]["TaskInstanceHistoryCollection"] +>; export type TaskInstanceReference = CamelCasedPropertiesDeep< components["schemas"]["TaskInstanceReference"] >; diff --git a/airflow/www/static/js/utils/index.test.ts b/airflow/www/static/js/utils/index.test.ts index 569d3af98b537..26b1fd6d84033 100644 --- a/airflow/www/static/js/utils/index.test.ts +++ b/airflow/www/static/js/utils/index.test.ts @@ -163,6 +163,7 @@ describe("Test highlightByKeywords", () => { const expected = `\x1b[1m\x1b[31mline with Error\x1b[39m\x1b[0m`; const highlightedLine = highlightByKeywords( originalLine, + "", ["error"], ["warn"], logGroupStart, @@ -175,6 +176,7 @@ describe("Test highlightByKeywords", () => { const expected = `\x1b[1m\x1b[33mline with Warning\x1b[39m\x1b[0m`; const highlightedLine = highlightByKeywords( originalLine, + "", ["error"], ["warn"], logGroupStart, @@ -187,6 +189,7 @@ describe("Test highlightByKeywords", () => { const expected = `\x1b[1m\x1b[31mline with error Warning\x1b[39m\x1b[0m`; const highlightedLine = highlightByKeywords( originalLine, + "", ["error"], ["warn"], logGroupStart, @@ -198,6 +201,7 @@ describe("Test highlightByKeywords", () => { const originalLine = " INFO - ::group::error"; const highlightedLine = highlightByKeywords( originalLine, + "", ["error"], ["warn"], logGroupStart, @@ -209,6 +213,7 @@ describe("Test highlightByKeywords", () => { const originalLine = " INFO - ::endgroup::"; const highlightedLine = highlightByKeywords( originalLine, + "", ["endgroup"], ["warn"], logGroupStart, @@ -220,6 +225,7 @@ describe("Test highlightByKeywords", () => { const originalLine = "sample line"; const highlightedLine = highlightByKeywords( originalLine, + "", ["error"], ["warn"], logGroupStart, diff --git a/airflow/www/static/js/utils/index.ts b/airflow/www/static/js/utils/index.ts index 8bef31a8582a9..2742612dc8a05 100644 --- a/airflow/www/static/js/utils/index.ts +++ b/airflow/www/static/js/utils/index.ts @@ -20,6 +20,7 @@ import Color from "color"; import type { DagRun, RunOrdering, Task, TaskInstance } from "src/types"; +import { LogLevel } from "src/dag/details/taskInstance/Logs/utils"; import useOffsetTop from "./useOffsetTop"; // Delay in ms for various hover actions @@ -187,6 +188,7 @@ const toSentenceCase = (camelCase: string): string => { const highlightByKeywords = ( parsedLine: string, + currentLogLevel: string, errorKeywords: string[], warningKeywords: string[], logGroupStart: RegExp, @@ -205,7 +207,7 @@ const highlightByKeywords = ( lowerParsedLine.includes(keyword) ); - if (containsError) { + if (containsError || currentLogLevel === (LogLevel.ERROR as string)) { return red(parsedLine); } @@ -213,7 +215,7 @@ const highlightByKeywords = ( lowerParsedLine.includes(keyword) ); - if (containsWarning) { + if (containsWarning || currentLogLevel === (LogLevel.WARNING as string)) { return yellow(parsedLine); } diff --git a/airflow/www/views.py b/airflow/www/views.py index 5e8ef6bb7f08c..92fa534f57191 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -316,7 +316,10 @@ def dag_to_grid(dag: DagModel, dag_runs: Sequence[DagRun], session: Session) -> TaskInstance.task_id, TaskInstance.run_id, TaskInstance.state, - TaskInstance.try_number, + case( + (TaskInstance.map_index == -1, TaskInstance.try_number), + else_=None, + ).label("try_number"), func.min(TaskInstanceNote.content).label("note"), func.count(func.coalesce(TaskInstance.state, sqla.literal("no_status"))).label("state_count"), func.min(TaskInstance.queued_dttm).label("queued_dttm"), @@ -328,7 +331,15 @@ def dag_to_grid(dag: DagModel, dag_runs: Sequence[DagRun], session: Session) -> TaskInstance.dag_id == dag.dag_id, TaskInstance.run_id.in_([dag_run.run_id for dag_run in dag_runs]), ) - .group_by(TaskInstance.task_id, TaskInstance.run_id, TaskInstance.state, TaskInstance.try_number) + .group_by( + TaskInstance.task_id, + TaskInstance.run_id, + TaskInstance.state, + case( + (TaskInstance.map_index == -1, TaskInstance.try_number), + else_=None, + ), + ) .order_by(TaskInstance.task_id, TaskInstance.run_id) ) @@ -4024,6 +4035,17 @@ class XComModelView(AirflowModelView): list_columns = ["key", "value", "timestamp", "dag_id", "task_id", "run_id", "map_index", "execution_date"] base_order = ("dag_run_id", "desc") + order_columns = [ + "key", + "value", + "timestamp", + "dag_id", + "task_id", + "run_id", + "map_index", + # "execution_date", # execution_date sorting is not working and crashing the UI, disabled for now. + ] + base_filters = [["dag_id", DagFilter, list]] formatters_columns = { @@ -4335,6 +4357,8 @@ def process_form(self, form, is_created): # value isn't an empty string. if value != "": extra[field_name] = value + elif field_name in extra: + del extra[field_name] if extra.keys(): sensitive_unchanged_keys = set() for key, value in extra.items(): diff --git a/chart/values.schema.json b/chart/values.schema.json index 49cadfbb64118..4aea4ab7c8915 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -671,7 +671,7 @@ "tag": { "description": "The StatsD image tag.", "type": "string", - "default": "v0.27.2" + "default": "v0.28.0" }, "pullPolicy": { "description": "The StatsD image pull policy.", diff --git a/chart/values.yaml b/chart/values.yaml index 13f7f455ebb74..ef242b4cc8de9 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -105,7 +105,7 @@ images: pullPolicy: IfNotPresent statsd: repository: quay.io/prometheus/statsd-exporter - tag: v0.27.2 + tag: v0.28.0 pullPolicy: IfNotPresent redis: repository: redis diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 1a5ccdc9e2b63..6ae2b49da6b28 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -16,7 +16,7 @@ # under the License. [build-system] -requires = ["hatchling==1.25.0"] +requires = ["hatchling==1.26.3"] build-backend = "hatchling.build" [project] diff --git a/contributing-docs/03_contributors_quick_start.rst b/contributing-docs/03_contributors_quick_start.rst index eb84bb668a78b..e663cf2ed19f1 100644 --- a/contributing-docs/03_contributors_quick_start.rst +++ b/contributing-docs/03_contributors_quick_start.rst @@ -451,6 +451,39 @@ tests are applied when you commit your code. To avoid burden on CI infrastructure and to save time, Pre-commit hooks can be run locally before committing changes. +.. note:: + + We have recently started to recommend ``uv`` for our local development. Currently (October 2024) ``uv`` + speeds up installation more than 10x comparing to ``pip``. While we still describe ``pip`` and ``pipx`` + below, we also show the ``uv`` alternatives. + +.. note:: + + Remember to have global python set to Python >= 3.9 - Python 3.8 is end-of-life already and we've + started to use Python 3.9+ features in Airflow and accompanying scripts. + + +Installing pre-commit is best done with ``pipx``: + +.. code-block:: bash + + pipx install pre-commit + +You can still add uv support for pre-commit if you use pipx using the commands: + +.. code-block:: bash + + pipx install pre-commit + pipx inject + pipx inject pre-commit pre-commit-uv + +Also, if you already use ``uvx`` instead of ``pipx``, use this command: + +.. code-block:: bash + + uv tool install pre-commit --with pre-commit-uv --force-reinstall + + 1. Installing required packages on Debian / Ubuntu, install via diff --git a/contributing-docs/testing/unit_tests.rst b/contributing-docs/testing/unit_tests.rst index c83f391e52817..ccd38250424df 100644 --- a/contributing-docs/testing/unit_tests.rst +++ b/contributing-docs/testing/unit_tests.rst @@ -378,10 +378,10 @@ If your test accesses the database but is not marked properly the Non-DB test in How to verify if DB test is correctly classified ................................................ -When you add if you want to see if your DB test is correctly classified, you can run the test or group +If you want to see if your DB test is correctly classified, you can run the test or group of tests with ``--skip-db-tests`` flag. -You can run the all (or subset of) test types if you want to make sure all ot the problems are fixed +You can run the all (or subset of) test types if you want to make sure all of the problems are fixed .. code-block:: bash @@ -502,8 +502,8 @@ Do this: Problems with Non-DB test collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sometimes, even if whole module is marked as ``@pytest.mark.db_test`` even parsing the file and collecting -tests will fail when ``--skip-db-tests`` is used because some of the imports od objects created in the +Sometimes, even if the whole module is marked as ``@pytest.mark.db_test``, parsing the file and collecting +tests will fail when ``--skip-db-tests`` is used because some of the imports or objects created in the module will read the database. Usually what helps is to move such initialization code to inside the tests or pytest fixtures (and pass @@ -1162,9 +1162,9 @@ directly to the container. Implementing compatibility for provider tests for older Airflow versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When you implement tests for providers, you should make sure that they are compatible with older +When you implement tests for providers, you should make sure that they are compatible with older Airflow versions. -Note that some of the tests if written without taking care about the compatibility, might not work with older +Note that some of the tests, if written without taking care about the compatibility, might not work with older versions of Airflow - this is because of refactorings, renames, and tests relying on internals of Airflow that are not part of the public API. We deal with it in one of the following ways: diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 749f89e106320..25aa8062c7722 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -335,7 +335,6 @@ export AIRFLOW_REPO_ROOT=$(pwd -P) rm -rf ${AIRFLOW_REPO_ROOT}/dist/* ``` - * Release candidate packages: ```shell script diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 2c38aa7c1a95e..713bf7ce83fd3 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: f8e8729f4236f050d4412cbbc9d53fdd4e6ddad65ce5fafd3c5b6fcdacbea5431eea760b961534a63fd5733b072b38e8167b5b0c12ee48b31c3257306ef11940 +Package config hash: d1d07397099e14c5fc5f0b0e13a87ac8e112bf66755f77cee62b29151cd18c2f2d35932906db6b3885af652defddce696ef9b2df58e21bd3a7749bca82baf910 --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/doc/01_installation.rst b/dev/breeze/doc/01_installation.rst index 6ff68d2bb6455..aad8640e7f60c 100644 --- a/dev/breeze/doc/01_installation.rst +++ b/dev/breeze/doc/01_installation.rst @@ -151,13 +151,28 @@ Docker in WSL 2 If VS Code is installed on the Windows host system then in the WSL Linux Distro you can run ``code .`` in the root directory of you Airflow repo to launch VS Code. -The pipx tool --------------- +The uv tool +----------- + +We are recommending to use the ``uv`` tool to manage your virtual environments and generally as a swiss-knife +of your Python environment (it supports installing various versions of Python, creating virtual environments, +installing packages, managing workspaces and running development tools.). + +Installing ``uv`` is described in the `uv documentation `_. +We highly recommend using ``uv`` to manage your Python environments, as it is very comprehensive, +easy to use, it is faster than any of the other tools availables (way faster!) and has a lot of features +that make it easier to work with Python. + +Alternative: pipx tool +---------------------- -We are using ``pipx`` tool to install and manage Breeze. The ``pipx`` tool is created by the creators +However, we do not want to be entirely dependent on ``uv`` as it is a software governed by a VC-backed vendor, +so we always want to provide open-source governed alternatives for our tools. If you can't or do not want to +use ``uv``, we got you covered. Another too you can use to manage development tools (and ``breeze`` development +environment is Python-Software-Foundation managed ``pipx``. The ``pipx`` tool is created by the creators of ``pip`` from `Python Packaging Authority `_ -Note that ``pipx`` >= 1.4.1 is used. +Note that ``pipx`` >= 1.4.1 should be used. Install pipx @@ -172,7 +187,7 @@ environments. This can be done automatically by the following command (follow in pipx ensurepath -In Mac +In case ``pipx`` is not in your PATH, you can run it with Python module: .. code-block:: bash diff --git a/dev/breeze/doc/ci/02_images.md b/dev/breeze/doc/ci/02_images.md index 19c58ebc2d2d9..1db263f8b3aa0 100644 --- a/dev/breeze/doc/ci/02_images.md +++ b/dev/breeze/doc/ci/02_images.md @@ -421,36 +421,39 @@ DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ The following build arguments (`--build-arg` in docker build command) can be used for CI images: -| Build argument | Default value | Description | -|-----------------------------------|-------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `PYTHON_BASE_IMAGE` | `python:3.8-slim-bookworm` | Base Python image | -| `PYTHON_MAJOR_MINOR_VERSION` | `3.8` | major/minor version of Python (should match base image) | -| `DEPENDENCIES_EPOCH_NUMBER` | `2` | increasing this number will reinstall all apt dependencies | -| `ADDITIONAL_PIP_INSTALL_FLAGS` | | additional `pip` flags passed to the installation commands (except when reinstalling `pip` itself) | -| `PIP_NO_CACHE_DIR` | `true` | if true, then no pip cache will be stored | -| `UV_NO_CACHE` | `true` | if true, then no uv cache will be stored | -| `HOME` | `/root` | Home directory of the root user (CI image has root user as default) | -| `AIRFLOW_HOME` | `/root/airflow` | Airflow's HOME (that's where logs and sqlite databases are stored) | -| `AIRFLOW_SOURCES` | `/opt/airflow` | Mounted sources of Airflow | -| `AIRFLOW_REPO` | `apache/airflow` | the repository from which PIP dependencies are pre-installed | -| `AIRFLOW_BRANCH` | `main` | the branch from which PIP dependencies are pre-installed | -| `AIRFLOW_CI_BUILD_EPOCH` | `1` | increasing this value will reinstall PIP dependencies from the repository from scratch | -| `AIRFLOW_CONSTRAINTS_LOCATION` | | If not empty, it will override the source of the constraints with the specified URL or file. | -| `AIRFLOW_CONSTRAINTS_REFERENCE` | | reference (branch or tag) from GitHub repository from which constraints are used. By default it is set to `constraints-main` but can be `constraints-2-X`. | -| `AIRFLOW_EXTRAS` | `all` | extras to install | -| `UPGRADE_INVALIDATION_STRING` | | If set to any random value the dependencies are upgraded to newer versions. In CI it is set to build id. | -| `AIRFLOW_PRE_CACHED_PIP_PACKAGES` | `true` | Allows to pre-cache airflow PIP packages from the GitHub of Apache Airflow This allows to optimize iterations for Image builds and speeds up CI jobs. | -| `ADDITIONAL_AIRFLOW_EXTRAS` | | additional extras to install | -| `ADDITIONAL_PYTHON_DEPS` | | additional Python dependencies to install | -| `DEV_APT_COMMAND` | | Dev apt command executed before dev deps are installed in the first part of image | -| `ADDITIONAL_DEV_APT_COMMAND` | | Additional Dev apt command executed before dev dep are installed in the first part of the image | -| `DEV_APT_DEPS` | Empty - install default dependencies (see `install_os_dependencies.sh`) | Dev APT dependencies installed in the first part of the image | -| `ADDITIONAL_DEV_APT_DEPS` | | Additional apt dev dependencies installed in the first part of the image | -| `ADDITIONAL_DEV_APT_ENV` | | Additional env variables defined when installing dev deps | -| `AIRFLOW_PIP_VERSION` | `24.0` | PIP version used. | -| `AIRFLOW_UV_VERSION` | `0.1.10` | UV version used. | -| `AIRFLOW_USE_UV` | `true` | Whether to use UV for installation. | -| `PIP_PROGRESS_BAR` | `on` | Progress bar for PIP installation | +| Build argument | Default value | Description | +|-----------------------------------|----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `PYTHON_BASE_IMAGE` | `python:3.8-slim-bookworm` | Base Python image | +| `PYTHON_MAJOR_MINOR_VERSION` | `3.8` | major/minor version of Python (should match base image) | +| `DEPENDENCIES_EPOCH_NUMBER` | `2` | increasing this number will reinstall all apt dependencies | +| `ADDITIONAL_PIP_INSTALL_FLAGS` | | additional `pip` flags passed to the installation commands (except when reinstalling `pip` itself) | +| `PIP_NO_CACHE_DIR` | `true` | if true, then no pip cache will be stored | +| `UV_NO_CACHE` | `true` | if true, then no uv cache will be stored | +| `HOME` | `/root` | Home directory of the root user (CI image has root user as default) | +| `AIRFLOW_HOME` | `/root/airflow` | Airflow's HOME (that's where logs and sqlite databases are stored) | +| `AIRFLOW_SOURCES` | `/opt/airflow` | Mounted sources of Airflow | +| `AIRFLOW_REPO` | `apache/airflow` | the repository from which PIP dependencies are pre-installed | +| `AIRFLOW_BRANCH` | `main` | the branch from which PIP dependencies are pre-installed | +| `AIRFLOW_CI_BUILD_EPOCH` | `1` | increasing this value will reinstall PIP dependencies from the repository from scratch | +| `AIRFLOW_CONSTRAINTS_LOCATION` | | If not empty, it will override the source of the constraints with the specified URL or file. | +| `AIRFLOW_CONSTRAINTS_REFERENCE` | | reference (branch or tag) from GitHub repository from which constraints are used. By default it is set to `constraints-main` but can be `constraints-2-X`. | +| `AIRFLOW_EXTRAS` | `all` | extras to install | +| `UPGRADE_INVALIDATION_STRING` | | If set to any random value the dependencies are upgraded to newer versions. In CI it is set to build id. | +| `AIRFLOW_PRE_CACHED_PIP_PACKAGES` | `true` | Allows to pre-cache airflow PIP packages from the GitHub of Apache Airflow This allows to optimize iterations for Image builds and speeds up CI jobs. | +| `ADDITIONAL_AIRFLOW_EXTRAS` | | additional extras to install | +| `ADDITIONAL_PYTHON_DEPS` | | additional Python dependencies to install | +| `DEV_APT_COMMAND` | | Dev apt command executed before dev deps are installed in the first part of image | +| `ADDITIONAL_DEV_APT_COMMAND` | | Additional Dev apt command executed before dev dep are installed in the first part of the image | +| `DEV_APT_DEPS` | | Dev APT dependencies installed in the first part of the image | +| `ADDITIONAL_DEV_APT_DEPS` | | Additional apt dev dependencies installed in the first part of the image | +| `ADDITIONAL_DEV_APT_ENV` | | Additional env variables defined when installing dev deps | +| `AIRFLOW_PIP_VERSION` | `24.3.1` | PIP version used. | +| `AIRFLOW_UV_VERSION` | `0.4.29` | UV version used. | +| `AIRFLOW_USE_UV` | `true` | Whether to use UV for installation. | +| `PIP_PROGRESS_BAR` | `on` | Progress bar for PIP installation | + + +The" Here are some examples of how CI images can built manually. CI is always built from local sources. diff --git a/dev/breeze/doc/ci/04_selective_checks.md b/dev/breeze/doc/ci/04_selective_checks.md index 819633d4c59ee..3f8d8a97fae03 100644 --- a/dev/breeze/doc/ci/04_selective_checks.md +++ b/dev/breeze/doc/ci/04_selective_checks.md @@ -201,7 +201,7 @@ Github Actions to pass the list of parameters to a command to execute | kubernetes-combos-list-as-string | All combinations of Python version and Kubernetes version to use for tests as space-separated string | 3.8-v1.25.2 3.9-v1.26.4 | * | | kubernetes-versions | All Kubernetes versions to use for tests as JSON array | ['v1.25.2'] | | | kubernetes-versions-list-as-string | All Kubernetes versions to use for tests as space-separated string | v1.25.2 | * | -| mypy-folders | List of folders to be considered for mypy | [] | | +| mypy-checks | List of folders to be considered for mypy | [] | | | mysql-exclude | Which versions of MySQL to exclude for tests as JSON array | [] | | | mysql-versions | Which versions of MySQL to use for tests as JSON array | ['5.7'] | | | needs-api-codegen | Whether "api-codegen" are needed to run ("true"/"false") | true | | diff --git a/dev/breeze/doc/images/output-commands.svg b/dev/breeze/doc/images/output-commands.svg index 08d3dc2a13eea..5888d1fc862eb 100644 --- a/dev/breeze/doc/images/output-commands.svg +++ b/dev/breeze/doc/images/output-commands.svg @@ -298,53 +298,53 @@ Usage:breeze[OPTIONSCOMMAND [ARGS]... ╭─ Execution mode â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--python-pPython major/minor version used in Airflow image for images.│ +│--python-pPython major/minor version used in Airflow image for images.│ │(>3.8< | 3.9 | 3.10 | 3.11 | 3.12)                          â”‚ │[default: 3.8]                                              â”‚ -│--integrationIntegration(s) to enable when running (can be more than one).                       â”‚ +│--integrationIntegration(s) to enable when running (can be more than one).                       â”‚ │(all | all-testable | cassandra | celery | drill | kafka | kerberos | mongo | mssql â”‚ │| openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)               â”‚ -│--standalone-dag-processorRun standalone dag processor for start-airflow.│ -│--database-isolationRun airflow in database isolation mode.│ +│--standalone-dag-processorRun standalone dag processor for start-airflow.│ +│--database-isolationRun airflow in database isolation mode.│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Docker Compose selection and cleanup â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--project-nameName of the docker-compose project to bring down. The `docker-compose` is for legacy breeze       â”‚ -│project name and you can use `breeze down --project-name docker-compose` to stop all containers   â”‚ +│--project-nameName of the docker-compose project to bring down. The `docker-compose` is for legacy breeze       â”‚ +│project name and you can use `breeze down --project-name docker-compose` to stop all containers   â”‚ │belonging to it.                                                                                  â”‚ │(breeze | pre-commit | docker-compose)                                                            â”‚ │[default: breeze]                                                                                 â”‚ -│--docker-hostOptional - docker host to use when running docker commands. When set, the `--builder` option is   â”‚ +│--docker-hostOptional - docker host to use when running docker commands. When set, the `--builder` option is   â”‚ │ignored when building images.                                                                     â”‚ │(TEXT)                                                                                            â”‚ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Database â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--backend-bDatabase backend to use. If 'none' is chosen, Breeze will start with an invalid database    â”‚ +│--backend-bDatabase backend to use. If 'none' is chosen, Breeze will start with an invalid database    â”‚ │configuration, meaning there will be no database available, and any attempts to connect to  â”‚ │the Airflow database will fail.                                                             â”‚ │(>sqlite< | mysql | postgres | none)                                                        â”‚ │[default: sqlite]                                                                           â”‚ -│--postgres-version-PVersion of Postgres used.(>12< | 13 | 14 | 15 | 16)[default: 12]│ -│--mysql-version-MVersion of MySQL used.(>8.0< | 8.4)[default: 8.0]│ -│--db-reset-dReset DB when entering the container.│ +│--postgres-version-PVersion of Postgres used.(>12< | 13 | 14 | 15 | 16)[default: 12]│ +│--mysql-version-MVersion of MySQL used.(>8.0< | 8.4)[default: 8.0]│ +│--db-reset-dReset DB when entering the container.│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Build CI image (before entering shell) â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -│--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)│ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +│--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)│ │[default: autodetect]                                         â”‚ -│--use-uv/--no-use-uvUse uv instead of pip as packaging tool to build the image.[default: use-uv]│ -│--uv-http-timeoutTimeout for requests that UV makes (only used in case of UV builds).(INTEGER RANGE)│ +│--use-uv/--no-use-uvUse uv instead of pip as packaging tool to build the image.[default: use-uv]│ +│--uv-http-timeoutTimeout for requests that UV makes (only used in case of UV builds).(INTEGER RANGE)│ │[default: 300; x>=1]                                                â”‚ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Other options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--forward-credentials-fForward local credentials to container when running.│ -│--max-timeMaximum time that the command should take - if it takes longer, the command will fail.│ +│--forward-credentials-fForward local credentials to container when running.│ +│--max-timeMaximum time that the command should take - if it takes longer, the command will fail.│ │(INTEGER RANGE)                                                                       â”‚ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--verbose-vPrint verbose information about performed steps.│ -│--help-hShow this message and exit.│ +│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--verbose-vPrint verbose information about performed steps.│ +│--help-hShow this message and exit.│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Developer commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® │start-airflow          Enter breeze environment and starts all Airflow components in the tmux session. Compile    â”‚ diff --git a/dev/breeze/doc/images/output_build-docs.svg b/dev/breeze/doc/images/output_build-docs.svg index e270c6b92e997..6fa9017144472 100644 --- a/dev/breeze/doc/images/output_build-docs.svg +++ b/dev/breeze/doc/images/output_build-docs.svg @@ -203,32 +203,32 @@ Build documents. ╭─ Doc flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--docs-only-dOnly build documentation.│ -│--spellcheck-only-sOnly run spell checking.│ -│--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx    â”‚ +│--docs-only-dOnly build documentation.│ +│--spellcheck-only-sOnly run spell checking.│ +│--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx    â”‚ │artifacts before the build - useful for a clean build.                           â”‚ -│--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx       â”‚ +│--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx       â”‚ │errors.                                                                          â”‚ -│--package-filterFilter(s) to use more than one can be specified. You can use glob pattern        â”‚ +│--package-filterFilter(s) to use more than one can be specified. You can use glob pattern        â”‚ │matching the full package name, for example `apache-airflow-providers-*`. Useful â”‚ │when you want to selectseveral similarly named packages together.                â”‚ │(TEXT)                                                                           â”‚ -│--include-not-ready-providersWhether to include providers that are not yet ready to be released.│ -│--include-removed-providersWhether to include providers that are removed.│ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -│--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)│ +│--include-not-ready-providersWhether to include providers that are not yet ready to be released.│ +│--include-removed-providersWhether to include providers that are removed.│ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +│--builderBuildx builder used to perform `docker buildx build` commands.(TEXT)│ │[default: autodetect]                                         â”‚ -│--package-listOptional, contains comma-separated list of package ids that are processed for    â”‚ +│--package-listOptional, contains comma-separated list of package ids that are processed for    â”‚ │documentation building, and document publishing. It is an easier alternative to  â”‚ │adding individual packages as arguments to every command. This overrides the     â”‚ │packages passed as arguments.                                                    â”‚ │(TEXT)                                                                           â”‚ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--verbose-vPrint verbose information about performed steps.│ -│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ -│--help-hShow this message and exit.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--verbose-vPrint verbose information about performed steps.│ +│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ +│--help-hShow this message and exit.│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_build-docs.txt b/dev/breeze/doc/images/output_build-docs.txt index 760b6b3d09826..85554fb426c0a 100644 --- a/dev/breeze/doc/images/output_build-docs.txt +++ b/dev/breeze/doc/images/output_build-docs.txt @@ -1 +1 @@ -ac6594538890f8fba65c916aa8672aa1 +91166ce4114ea9c162c139d2aff15886 diff --git a/dev/breeze/doc/images/output_prod-image.svg b/dev/breeze/doc/images/output_prod-image.svg index 6b907c07a6b27..ef8e95626d14a 100644 --- a/dev/breeze/doc/images/output_prod-image.svg +++ b/dev/breeze/doc/images/output_prod-image.svg @@ -98,7 +98,7 @@ Tools that developers can use to manually manage PROD images ╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--help-hShow this message and exit.│ +│--help-hShow this message and exit.│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Production Image tools â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® │build  Build Production image. Include building multiple images for all or selected Python versions sequentially. â”‚ diff --git a/dev/breeze/doc/images/output_prod-image.txt b/dev/breeze/doc/images/output_prod-image.txt index 4e4ac97bd602d..c767ee09d4fd3 100644 --- a/dev/breeze/doc/images/output_prod-image.txt +++ b/dev/breeze/doc/images/output_prod-image.txt @@ -1 +1 @@ -55030fe0d7718eb668fa1a37128647b0 +d91bcc76b14f186e749efe2c6aaa8682 diff --git a/dev/breeze/doc/images/output_prod-image_build.txt b/dev/breeze/doc/images/output_prod-image_build.txt index e1e2a2c9c6c7f..1645f4d547baa 100644 --- a/dev/breeze/doc/images/output_prod-image_build.txt +++ b/dev/breeze/doc/images/output_prod-image_build.txt @@ -1 +1 @@ -88290b22adcd4e5cc9da29aaa8467992 +c243f4de16bc858f6202d88922f00109 diff --git a/dev/breeze/doc/images/output_setup.svg b/dev/breeze/doc/images/output_setup.svg index c747a1eea7f38..5dda408adefbc 100644 --- a/dev/breeze/doc/images/output_setup.svg +++ b/dev/breeze/doc/images/output_setup.svg @@ -110,7 +110,7 @@ Tools that developers can use to configure Breeze ╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--help-hShow this message and exit.│ +│--help-hShow this message and exit.│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Setup â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® │autocomplete                   Enables autocompletion of breeze commands.                                         â”‚ diff --git a/dev/breeze/doc/images/output_setup.txt b/dev/breeze/doc/images/output_setup.txt index b8f9048b91f0b..274751197daaf 100644 --- a/dev/breeze/doc/images/output_setup.txt +++ b/dev/breeze/doc/images/output_setup.txt @@ -1 +1 @@ -d4a4f1b405f912fa234ff4116068290a +08c78d9dddd037a2ade6b751c5a22ff9 diff --git a/dev/breeze/doc/images/output_setup_autocomplete.svg b/dev/breeze/doc/images/output_setup_autocomplete.svg index e118e1fced9a8..31f7814001faa 100644 --- a/dev/breeze/doc/images/output_setup_autocomplete.svg +++ b/dev/breeze/doc/images/output_setup_autocomplete.svg @@ -102,13 +102,13 @@ Enables autocompletion of breeze commands. ╭─ Setup autocomplete flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--force-fForce autocomplete setup even if already setup before (overrides the setup).│ +│--force-fForce autocomplete setup even if already setup before (overrides the setup).│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ -│--help-hShow this message and exit.│ +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ +│--help-hShow this message and exit.│ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_setup_autocomplete.txt b/dev/breeze/doc/images/output_setup_autocomplete.txt index 185feef026464..144c2613cd695 100644 --- a/dev/breeze/doc/images/output_setup_autocomplete.txt +++ b/dev/breeze/doc/images/output_setup_autocomplete.txt @@ -1 +1 @@ -fffcd49e102e09ccd69b3841a9e3ea8e +ec3b4541a478afe5cb86a6f1c48f50f5 diff --git a/dev/breeze/doc/images/output_setup_config.svg b/dev/breeze/doc/images/output_setup_config.svg index 9a42467ea5281..5a44bb20030b9 100644 --- a/dev/breeze/doc/images/output_setup_config.svg +++ b/dev/breeze/doc/images/output_setup_config.svg @@ -1,4 +1,4 @@ - +