diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml new file mode 100644 index 0000000000000..909f0c1cdca3c --- /dev/null +++ b/.github/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +default_stages: [manual] +default_language_version: + python: python311 +minimum_pre_commit_version: '3.2.0' +repos: + - repo: https://github.com/eclipse-csi/octopin + rev: 21360742e352e87450f99e180fdfc2cf774a72a3 + hooks: + - id: pin-versions + name: Pin versions of dependencies in CI workflows (manual) + stages: ['manual'] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7a97a52539cac..69f10c58301a7 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -23,7 +23,7 @@ # API /airflow-core/src/airflow/api/ @ephraimbuddy @pierrejeambrun @rawwar @jason810496 -/airflow-core/src/airflow/api_fastapi/ @ephraimbuddy @pierrejeambrun @rawwar @jason810496 @bugraoz93 +/airflow-core/src/airflow/api_fastapi/ @ephraimbuddy @pierrejeambrun @rawwar @jason810496 @bugraoz93 @shubhamraj-git /airflow-core/src/airflow/api_fastapi/execution_api/ @ashb @kaxil @amoghrajesh # Airflow CTL @@ -33,7 +33,18 @@ /airflow-core/src/airflow/api_fastapi/auth/ @vincbeck # UI -/airflow-core/src/airflow/ui/ @bbovenzi @pierrejeambrun @ryanahamilton @jscheffl +/airflow-core/src/airflow/ui/ @bbovenzi @pierrejeambrun @ryanahamilton @jscheffl @shubhamraj-git + +# Translation Owners (i18n) +# Note: Non committer engaged translators are listed in comments prevent making file syntax invalid +# See: https://github.com/apache/airflow/blob/main/airflow-core/src/airflow/ui/public/i18n/README.md#43-engaged-translator +airflow-core/src/airflow/ui/public/i18n/locales/ar/ @shahar1 @hussein-awala # + @ahmadtfarhan +airflow-core/src/airflow/ui/public/i18n/locales/de/ @jscheffl # + @TJaniF @m1racoli +airflow-core/src/airflow/ui/public/i18n/locales/he/ @eladkal @shahar1 @romsharon98 # +@Dev-iL +airflow-core/src/airflow/ui/public/i18n/locales/nl/ @BasPH # + @DjVinnii +airflow-core/src/airflow/ui/public/i18n/locales/pl/ @potiuk @mobuchowski # + @kacpermuda +airflow-core/src/airflow/ui/public/i18n/locales/zh-TW/ @Lee-W @jason810496 # + @RoyLee1224 @guan404ming +airflow-core/src/airflow/ui/public/i18n/locales/fr/ @pierrejeambrun @vincbeck # Security/Permissions /airflow-core/src/airflow/security/permissions.py @vincbeck @@ -69,6 +80,7 @@ /providers/edge3/ @jscheffl /providers/fab/ @vincbeck /providers/hashicorp/ @hussein-awala +/providers/keycloak/ @vincbeck /providers/openlineage/ @mobuchowski /providers/slack/ @eladkal /providers/smtp/ @hussein-awala @@ -77,7 +89,7 @@ # Dev tools /.github/workflows/ @potiuk @ashb @gopidesupavan -/dev/ @potiuk @ashb @jedcunningham @gopidesupavan +/dev/ @potiuk @ashb @jedcunningham @gopidesupavan @amoghrajesh /docker-tests/ @potiuk @ashb @gopidesupavan @jason810496 /kubernetes-tests/ @potiuk @ashb @gopidesupavan @jason810496 /helm-tests/ @dstandish @jedcunningham @@ -109,4 +121,8 @@ ISSUE_TRIAGE_PROCESS.rst @eladkal /providers/fab/src/airflow-core/src/airflow/providers/fab/migrations/ @ephraimbuddy # AIP-72 - Task SDK +# Python SDK /task-sdk/ @ashb @kaxil @amoghrajesh + +# Golang SDK +/go-sdk/ @ashb @kaxil @amoghrajesh diff --git a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml b/.github/ISSUE_TEMPLATE/1-airflow_bug_report.yml similarity index 99% rename from .github/ISSUE_TEMPLATE/airflow_bug_report.yml rename to .github/ISSUE_TEMPLATE/1-airflow_bug_report.yml index bd038baf6346b..862037f29bac9 100644 --- a/.github/ISSUE_TEMPLATE/airflow_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/1-airflow_bug_report.yml @@ -25,7 +25,7 @@ body: the latest release or main to see if the issue is fixed before reporting it. multiple: false options: - - "3.0.1" + - "3.0.2" - "2.11.0" - "main (development)" - "Other Airflow 2 version (please specify below)" diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/2-feature_request.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/feature_request.yml rename to .github/ISSUE_TEMPLATE/2-feature_request.yml diff --git a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml b/.github/ISSUE_TEMPLATE/3-airflow_providers_bug_report.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml rename to .github/ISSUE_TEMPLATE/3-airflow_providers_bug_report.yml diff --git a/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml b/.github/ISSUE_TEMPLATE/4-airflow_helmchart_bug_report.yml similarity index 99% rename from .github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml rename to .github/ISSUE_TEMPLATE/4-airflow_helmchart_bug_report.yml index 3c8b7e68a82bf..bb8abfe40a9ee 100644 --- a/.github/ISSUE_TEMPLATE/airflow_helmchart_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4-airflow_helmchart_bug_report.yml @@ -28,7 +28,8 @@ body: What Apache Airflow Helm Chart version are you using? multiple: false options: - - "1.16.0 (latest released)" + - "1.17.0 (latest released)" + - "1.16.0" - "1.15.0" - "1.14.0" - "1.13.1" diff --git a/.github/ISSUE_TEMPLATE/airflow_doc_issue_report.yml b/.github/ISSUE_TEMPLATE/5-airflow_doc_issue_report.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/airflow_doc_issue_report.yml rename to .github/ISSUE_TEMPLATE/5-airflow_doc_issue_report.yml diff --git a/.github/ISSUE_TEMPLATE/~free_form.yml b/.github/ISSUE_TEMPLATE/6-free_form.yml similarity index 100% rename from .github/ISSUE_TEMPLATE/~free_form.yml rename to .github/ISSUE_TEMPLATE/6-free_form.yml diff --git a/.github/actions/breeze/action.yml b/.github/actions/breeze/action.yml index 39e87cd7d8b52..40f683c5a6fd4 100644 --- a/.github/actions/breeze/action.yml +++ b/.github/actions/breeze/action.yml @@ -41,6 +41,8 @@ runs: - name: "Install Breeze" shell: bash run: ./scripts/ci/install_breeze.sh + env: + PYTHON_VERSION: "${{ inputs.python-version }}" - name: "Free space" shell: bash run: breeze ci free-space diff --git a/.github/actions/install-pre-commit/action.yml b/.github/actions/install-pre-commit/action.yml index 5289389930c15..66cd6a4962963 100644 --- a/.github/actions/install-pre-commit/action.yml +++ b/.github/actions/install-pre-commit/action.yml @@ -24,13 +24,16 @@ inputs: default: "3.9" uv-version: description: 'uv version to use' - default: "0.7.8" # Keep this comment to allow automatic replacement of uv version + default: "0.7.17" # Keep this comment to allow automatic replacement of uv version pre-commit-version: description: 'pre-commit version to use' default: "4.2.0" # Keep this comment to allow automatic replacement of pre-commit version pre-commit-uv-version: description: 'pre-commit-uv version to use' default: "4.1.4" # Keep this comment to allow automatic replacement of pre-commit-uv version + skip-pre-commits: + description: "Skip some pre-commits from installation" + default: "" runs: using: "composite" steps: @@ -40,6 +43,7 @@ runs: UV_VERSION: ${{inputs.uv-version}} PRE_COMMIT_VERSION: ${{inputs.pre-commit-version}} PRE_COMMIT_UV_VERSION: ${{inputs.pre-commit-uv-version}} + SKIP: ${{ inputs.skip-pre-commits }} run: | pip install uv==${UV_VERSION} || true uv tool install pre-commit==${PRE_COMMIT_VERSION} --with uv==${UV_VERSION} \ @@ -86,3 +90,5 @@ runs: shell: bash run: pre-commit install-hooks || (cat ~/.cache/pre-commit/pre-commit.log && exit 1) working-directory: ${{ github.workspace }} + env: + SKIP: ${{ inputs.skip-pre-commits }} diff --git a/.github/actions/post_tests_success/action.yml b/.github/actions/post_tests_success/action.yml index 865f1e4857cb2..36ee429477733 100644 --- a/.github/actions/post_tests_success/action.yml +++ b/.github/actions/post_tests_success/action.yml @@ -44,7 +44,7 @@ runs: mkdir ./files/coverage-reports mv ./files/coverage*.xml ./files/coverage-reports/ || true - name: "Upload all coverage reports to codecov" - uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4 + uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 env: CODECOV_TOKEN: ${{ inputs.codecov-token }} if: env.ENABLE_COVERAGE == 'true' && env.TEST_TYPES != 'Helm' && inputs.python-version != '3.12' diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index cff760ad9dc38..7cf001621d3c6 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -108,9 +108,6 @@ labelPRBasedOnFilePath: provider:common-sql: - providers/common/sql/** - provider:standard: - - providers/standard/** - provider:databricks: - providers/databricks/** @@ -216,7 +213,7 @@ labelPRBasedOnFilePath: provider:opsgenie: - providers/opsgenie/** - provider:Oracle: + provider:oracle: - providers/oracle/** provider:pagerduty: @@ -276,6 +273,9 @@ labelPRBasedOnFilePath: provider:ssh: - providers/ssh/** + provider:standard: + - providers/standard/** + provider:tableau: - providers/tableau/** @@ -326,6 +326,26 @@ labelPRBasedOnFilePath: - .rat-excludes - .readthedocs.yml + # This should not be enabled on the v3-0-test branch, it adds circular backport when PR open PR created + # from the v3-0-test branch + # This should be copy of the "area:dev-tools" above and should be updated when we switch maintenance branch + # backport-to-v3-0-test: + # - scripts/**/* + # - dev/**/* + # - .github/**/* + # - Dockerfile.ci + # - CONTRIBUTING.rst + # - contributing-docs/**/* + # - yamllint-config.yml + # - .asf.yaml + # - .bash_completion + # - .dockerignore + # - .hadolint.yaml + # - .pre-commit-config.yaml + # - .rat-excludes + # - .readthedocs.yml + + kind:documentation: - airflow-core/docs/**/* - chart/docs/**/* @@ -353,11 +373,16 @@ labelPRBasedOnFilePath: - airflow-core/docs/administration-and-deployment/lineage.rst area:Logging: + - airflow-core/src/airflow/config_templates/airflow_local_settings.py + - airflow-core/tests/unit/core/test_logging_config.py - airflow-core/src/airflow/utils/log/**/* - airflow-core/docs/administration-and-deployment/logging-monitoring/logging-*.rst - airflow-core/tests/unit/utils/log/**/* - providers/**/log/* + area:ConfigTemplates: + - airflow-core/src/airflow/config_templates/* + area:Plugins: - airflow-core/src/airflow/cli/commands/plugins_command.py - airflow-core/src/airflow/plugins_manager.py diff --git a/.github/dependabot.yml b/.github/dependabot.yml index adefbb9f478f7..a0404b811674e 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -33,7 +33,6 @@ updates: - package-ecosystem: npm directories: - - /airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui - /airflow-core/src/airflow/ui schedule: interval: daily @@ -41,6 +40,16 @@ updates: core-ui-package-updates: patterns: - "*" + + - package-ecosystem: npm + directories: + - /airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui + schedule: + interval: daily + groups: + core-ui-package-updates: + patterns: + - "*" - package-ecosystem: npm directories: - /providers/fab/src/airflow/providers/fab/www @@ -51,7 +60,7 @@ updates: patterns: - "*" - # Repeat dependency updates on 2.10 branch as well + # Repeat dependency updates on 2.11 branch as well - package-ecosystem: pip directories: - /clients/python @@ -60,14 +69,14 @@ updates: - / schedule: interval: daily - target-branch: v2-10-test + target-branch: v2-11-test - package-ecosystem: npm directories: - /airflow/www/ schedule: interval: daily - target-branch: v2-10-test + target-branch: v2-11-test groups: core-ui-package-updates: patterns: diff --git a/.github/workflows/additional-ci-image-checks.yml b/.github/workflows/additional-ci-image-checks.yml index 9e7edaee0ee85..d78404dfef5b4 100644 --- a/.github/workflows/additional-ci-image-checks.yml +++ b/.github/workflows/additional-ci-image-checks.yml @@ -135,7 +135,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" diff --git a/.github/workflows/additional-prod-image-tests.yml b/.github/workflows/additional-prod-image-tests.yml index a2e9deed6df5c..d8e3180f883c9 100644 --- a/.github/workflows/additional-prod-image-tests.yml +++ b/.github/workflows/additional-prod-image-tests.yml @@ -107,7 +107,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 2 persist-credentials: false @@ -144,7 +144,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 2 persist-credentials: false diff --git a/.github/workflows/airflow-distributions-tests.yml b/.github/workflows/airflow-distributions-tests.yml index a7156ef98dfb7..62d4ee1f6b420 100644 --- a/.github/workflows/airflow-distributions-tests.yml +++ b/.github/workflows/airflow-distributions-tests.yml @@ -57,11 +57,20 @@ on: # yamllint disable-line rule:truthy description: "Whether this is a canary run (true/false)" required: true type: string + use-local-venv: + description: "Whether local venv should be used for tests (true/false)" + required: true + type: string + test-timeout: + required: false + type: number + default: 60 + permissions: contents: read jobs: distributions-tests: - timeout-minutes: 80 + timeout-minutes: ${{ fromJSON(inputs.test-timeout) }} name: ${{ inputs.distribution-name }}:P${{ matrix.python-version }} tests runs-on: ${{ fromJSON(inputs.runners) }} strategy: @@ -80,7 +89,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" @@ -89,18 +98,34 @@ jobs: platform: ${{ inputs.platform }} python: ${{ matrix.python-version }} use-uv: ${{ inputs.use-uv }} + if: ${{ inputs.use-local-venv != 'true' }} + - name: "Prepare and cleanup runner" + run: ./scripts/ci/prepare_and_cleanup_runner.sh + shell: bash + if: ${{ inputs.use-local-venv == 'true' }} + - name: "Install Breeze" + uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} + if: ${{ inputs.use-local-venv == 'true' }} - name: "Cleanup dist files" run: rm -fv ./dist/* + if: ${{ matrix.python-version == inputs.default-python-version }} # Conditional steps based on the distribution name - name: "Prepare Airflow ${{inputs.distribution-name}}: wheel" env: DISTRIBUTION_TYPE: "${{ inputs.distribution-cmd-format }}" - run: > + USE_LOCAL_HATCH: "${{ inputs.use-local-venv }}" + run: | + uv tool uninstall hatch || true + uv tool install hatch==1.14.1 breeze release-management "${DISTRIBUTION_TYPE}" --distribution-format wheel + if: ${{ matrix.python-version == inputs.default-python-version }} - name: "Verify wheel packages with twine" run: | uv tool uninstall twine || true uv tool install twine && twine check dist/*.whl + if: ${{ matrix.python-version == inputs.default-python-version }} - name: > Run unit tests for Airflow ${{inputs.distribution-name}}:Python ${{ matrix.python-version }} env: diff --git a/.github/workflows/automatic-backport.yml b/.github/workflows/automatic-backport.yml index 4c72401a5d317..4f861ddd58118 100644 --- a/.github/workflows/automatic-backport.yml +++ b/.github/workflows/automatic-backport.yml @@ -37,7 +37,7 @@ jobs: - name: Find PR information id: pr-info - uses: actions/github-script@v7 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/backport-cli.yml b/.github/workflows/backport-cli.yml index 673607027496d..0ecdfb8e63e04 100644 --- a/.github/workflows/backport-cli.yml +++ b/.github/workflows/backport-cli.yml @@ -53,7 +53,7 @@ jobs: steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" id: checkout-for-backport - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: true fetch-depth: 0 diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index 52983ad65d559..161ba36d58043 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -71,7 +71,7 @@ jobs: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: # Need to fetch all history for selective checks tests fetch-depth: 0 @@ -94,7 +94,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -105,7 +105,7 @@ jobs: version: 9 run_install: false - name: "Setup node" - uses: actions/setup-node@v4 + uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 with: node-version: 21 cache: 'pnpm' @@ -158,12 +158,13 @@ jobs: runs-on: ${{ fromJSON(inputs.runners) }} env: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + SKIP: ${{ inputs.skip-pre-commits }} steps: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Install Breeze" @@ -176,6 +177,7 @@ jobs: id: pre-commit with: python-version: ${{steps.breeze.outputs.host-python-version}} + skip-pre-commits: ${{ inputs.skip-pre-commits }} # Those checks are run if no image needs to be built for checks. This is for simple changes that # Do not touch any of the python code or any of the important files that might require building @@ -191,7 +193,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -205,9 +207,10 @@ jobs: uses: ./.github/actions/install-pre-commit id: pre-commit with: - python-version: ${{steps.breeze.outputs.host-python-version}} + python-version: ${{ steps.breeze.outputs.host-python-version }} + skip-pre-commits: ${{ inputs.skip-pre-commits }} - name: Fetch incoming commit ${{ github.sha }} with its parent - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.sha }} fetch-depth: 2 @@ -220,7 +223,7 @@ jobs: VERBOSE: "false" SKIP_BREEZE_PRE_COMMITS: "true" SKIP: ${{ inputs.skip-pre-commits }} - COLUMNS: "250" + COLUMNS: "202" test-git-clone-on-windows: timeout-minutes: 5 @@ -228,7 +231,7 @@ jobs: runs-on: ["windows-latest"] steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 2 persist-credentials: false @@ -246,7 +249,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -261,6 +264,7 @@ jobs: id: pre-commit with: python-version: ${{steps.breeze.outputs.host-python-version}} + skip-pre-commits: ${{ inputs.skip-pre-commits }} - name: "Autoupdate all pre-commits" run: pre-commit autoupdate - name: "Run automated upgrade for black" @@ -319,7 +323,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -359,4 +363,5 @@ jobs: run: git fetch --tags >/dev/null 2>&1 || true - name: "Test airflow core issue generation automatically" run: | - breeze release-management generate-issue-content-core --limit-pr-count 25 --latest --verbose + breeze release-management generate-issue-content-core \ + --limit-pr-count 2 --previous-release 3.0.1 --current-release 3.0.2 --verbose diff --git a/.github/workflows/ci-amd.yml b/.github/workflows/ci-amd.yml index e67351d64108f..252d5cfe78b60 100644 --- a/.github/workflows/ci-amd.yml +++ b/.github/workflows/ci-amd.yml @@ -113,6 +113,7 @@ jobs: run-coverage: ${{ steps.source-run-info.outputs.run-coverage }} run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} run-task-sdk-tests: ${{ steps.selective-checks.outputs.run-task-sdk-tests }} + run-go-sdk-tests: ${{ steps.selective-checks.outputs.run-go-sdk-tests }} run-system-tests: ${{ steps.selective-checks.outputs.run-system-tests }} run-tests: ${{ steps.selective-checks.outputs.run-tests }} run-ui-tests: ${{ steps.selective-checks.outputs.run-ui-tests }} @@ -134,13 +135,13 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" run: ./scripts/ci/prepare_and_cleanup_runner.sh - name: Fetch incoming commit ${{ github.sha }} with its parent - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.sha }} fetch-depth: 2 @@ -168,6 +169,37 @@ jobs: PR_LABELS: ${{ steps.source-run-info.outputs.pr-labels }} GITHUB_CONTEXT: ${{ toJson(github) }} + run-pin-versions-pre-commit: + name: "Run pin-versions pre-commit" + needs: [build-info] + runs-on: ${{ fromJSON(needs.build-info.outputs.amd-runners) }} + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + - name: "Install Python 3.11 as 3.11+ is needed by pin-versions pre-commit" + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1 + with: + python-version: 3.11 + cache: "pip" + - name: Install pre-commit, uv, and pre-commit-uv + shell: bash + env: + UV_VERSION: "0.7.17" # Keep this comment to allow automatic replacement of uv version + PRE_COMMIT_VERSION: "4.2.0" # Keep this comment to allow automatic replacement of pre-commit version + PRE_COMMIT_UV_VERSION: "4.1.4" # Keep this comment to allow automatic replacement of pre-commit-uv version + run: | + pip install uv==${UV_VERSION} || true + uv tool install pre-commit==${PRE_COMMIT_VERSION} --with uv==${UV_VERSION} \ + --with pre-commit-uv==${PRE_COMMIT_UV_VERSION} + - name: "Run pin-versions pre-commit" + run: > + pre-commit run -c .github/.pre-commit-config.yaml --all-files --verbose --hook-stage manual + pin-versions + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + basic-tests: name: "Basic tests" needs: [build-info] @@ -216,7 +248,6 @@ jobs: contents: read packages: write id-token: write - if: needs.build-info.outputs.canary-run == 'true' with: runners: ${{ needs.build-info.outputs.amd-runners }} platform: "linux/amd64" @@ -243,6 +274,8 @@ jobs: runners: ${{ needs.build-info.outputs.amd-runners }} platform: "linux/amd64" python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} + python-versions: ${{ needs.build-info.outputs.python-versions }} + generate-pypi-constraints: "true" # generate no providers constraints only in canary builds - they take quite some time to generate # they are not needed for regular builds, they are only needed to update constraints in canaries generate-no-providers-constraints: ${{ needs.build-info.outputs.canary-run }} @@ -751,32 +784,72 @@ jobs: distribution-name: "task-sdk" distribution-cmd-format: "prepare-task-sdk-distributions" test-type: "task-sdk-tests" + use-local-venv: 'false' + test-timeout: 20 if: > ( needs.build-info.outputs.run-task-sdk-tests == 'true' || needs.build-info.outputs.run-tests == 'true' && needs.build-info.outputs.only-new-ui-files != 'true') - tests-airflow-ctl: - name: "Airflow CTL tests" - uses: ./.github/workflows/airflow-distributions-tests.yml - needs: [build-info, build-ci-images] - permissions: - contents: read - packages: read - with: - runners: ${{ needs.build-info.outputs.amd-runners }} - platform: "linux/amd64" - default-python-version: ${{ needs.build-info.outputs.default-python-version }} - python-versions: ${{ needs.build-info.outputs.python-versions }} - use-uv: ${{ needs.build-info.outputs.use-uv }} - canary-run: ${{ needs.build-info.outputs.canary-run }} - distribution-name: "airflow-ctl" - distribution-cmd-format: "prepare-airflow-ctl-distributions" - test-type: "airflow-ctl-tests" - if: > - ( needs.build-info.outputs.run-airflow-ctl-tests == 'true' || - needs.build-info.outputs.run-tests == 'true' && - needs.build-info.outputs.only-new-ui-files != 'true') +# tests-go-sdk: +# name: "Go SDK tests" +# needs: [build-info, build-ci-images] +# runs-on: ${{ fromJSON(needs.build-info.outputs.amd-runners) }} +# timeout-minutes: 15 +# permissions: +# contents: read +# packages: read +# if: > +# ( needs.build-info.outputs.run-go-sdk-tests == 'true' || +# needs.build-info.outputs.run-tests == 'true' && +# needs.build-info.outputs.only-new-ui-files != 'true') +# env: +# GITHUB_REPOSITORY: ${{ github.repository }} +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# GITHUB_USERNAME: ${{ github.actor }} +# VERBOSE: "true" +# steps: +# - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" +# uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 +# with: +# persist-credentials: false +# +# # keep this in sync with go.mod in go-sdk/ +# - name: Setup Go +# uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0 +# with: +# go-version: 1.24 +# cache-dependency-path: go-sdk/go.sum +# +# - name: "Cleanup dist files" +# run: rm -fv ./dist/* +# +# - name: Run Go tests +# working-directory: ./go-sdk +# run: go test -v ./... +# +# tests-airflow-ctl: +# name: "Airflow CTL tests" +# uses: ./.github/workflows/airflow-distributions-tests.yml +# needs: [build-info] +# permissions: +# contents: read +# packages: read +# with: +# runners: ${{ needs.build-info.outputs.amd-runners }} +# platform: "linux/amd64" +# default-python-version: ${{ needs.build-info.outputs.default-python-version }} +# python-versions: ${{ needs.build-info.outputs.python-versions }} +# use-uv: ${{ needs.build-info.outputs.use-uv }} +# canary-run: ${{ needs.build-info.outputs.canary-run }} +# distribution-name: "airflow-ctl" +# distribution-cmd-format: "prepare-airflow-ctl-distributions" +# test-type: "airflow-ctl-tests" +# use-local-venv: 'true' +# if: > +# ( needs.build-info.outputs.run-airflow-ctl-tests == 'true' || +# needs.build-info.outputs.run-tests == 'true' && +# needs.build-info.outputs.only-new-ui-files != 'true') finalize-tests: name: Finalize tests @@ -807,7 +880,7 @@ jobs: - tests-sqlite-core - tests-sqlite-providers - tests-task-sdk - - tests-airflow-ctl + # - tests-airflow-ctl - tests-with-lowest-direct-resolution-core - tests-with-lowest-direct-resolution-providers uses: ./.github/workflows/finalize-tests.yml @@ -829,6 +902,8 @@ jobs: notify-slack-failure: name: "Notify Slack on Failure" + needs: + - finalize-tests if: github.event_name == 'schedule' && failure() && github.run_attempt == 1 runs-on: ["ubuntu-22.04"] steps: @@ -846,7 +921,7 @@ jobs: - type: "section" text: type: "mrkdwn" - text: "🚨🕒 Scheduled CI Failure Alert (AMD) on branch *${{ github.ref_name }}* 🕒🚨\n\n*Details:* " + text: "🚨🕒 Scheduled CI Failure Alert (AMD) 🕒🚨\n\n*Details:* " # yamllint enable rule:line-length summarize-warnings: @@ -859,7 +934,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -868,12 +943,12 @@ jobs: shell: bash run: ./scripts/tools/free_up_disk_space.sh - name: "Download all test warning artifacts from the current build" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: path: ./artifacts pattern: test-warnings-* - name: "Setup python" - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: ${{ inputs.default-python-version }} - name: "Summarize all warnings" @@ -882,7 +957,7 @@ jobs: --pattern "**/warnings-*.txt" \ --output ./files - name: "Upload artifact for summarized warnings" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: test-summarized-amd-runner-warnings path: ./files/warn-summary-*.txt diff --git a/.github/workflows/ci-arm.yml b/.github/workflows/ci-arm.yml index c1b39b7d3dfc0..2fb75cf2e9784 100644 --- a/.github/workflows/ci-arm.yml +++ b/.github/workflows/ci-arm.yml @@ -106,6 +106,7 @@ jobs: run-coverage: ${{ steps.source-run-info.outputs.run-coverage }} run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} run-task-sdk-tests: ${{ steps.selective-checks.outputs.run-task-sdk-tests }} + run-go-sdk-tests: ${{ steps.selective-checks.outputs.run-go-sdk-tests }} run-system-tests: ${{ steps.selective-checks.outputs.run-system-tests }} run-tests: ${{ steps.selective-checks.outputs.run-tests }} run-ui-tests: ${{ steps.selective-checks.outputs.run-ui-tests }} @@ -127,13 +128,13 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" run: ./scripts/ci/prepare_and_cleanup_runner.sh - name: Fetch incoming commit ${{ github.sha }} with its parent - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.sha }} fetch-depth: 2 @@ -209,7 +210,6 @@ jobs: contents: read packages: write id-token: write - if: needs.build-info.outputs.canary-run == 'true' with: runners: ${{ needs.build-info.outputs.arm-runners }} platform: "linux/arm64" @@ -236,13 +236,14 @@ jobs: runners: ${{ needs.build-info.outputs.arm-runners }} platform: "linux/arm64" python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} + python-versions: ${{ needs.build-info.outputs.python-versions }} + generate-pypi-constraints: "true" # generate no providers constraints only in canary builds - they take quite some time to generate # they are not needed for regular builds, they are only needed to update constraints in canaries generate-no-providers-constraints: ${{ needs.build-info.outputs.canary-run }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} use-uv: ${{ needs.build-info.outputs.use-uv }} - providers: name: "provider distributions tests" uses: ./.github/workflows/test-providers.yml @@ -507,6 +508,43 @@ jobs: ( needs.build-info.outputs.run-kubernetes-tests == 'true' || needs.build-info.outputs.needs-helm-tests == 'true') +# tests-go-sdk: +# name: "Go SDK tests" +# needs: [build-info, build-ci-images] +# runs-on: ${{ fromJSON(needs.build-info.outputs.arm-runners) }} +# timeout-minutes: 15 +# permissions: +# contents: read +# packages: read +# if: > +# ( needs.build-info.outputs.run-go-sdk-tests == 'true' || +# needs.build-info.outputs.run-tests == 'true' && +# needs.build-info.outputs.only-new-ui-files != 'true') +# env: +# GITHUB_REPOSITORY: ${{ github.repository }} +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# GITHUB_USERNAME: ${{ github.actor }} +# VERBOSE: "true" +# steps: +# - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" +# uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 +# with: +# persist-credentials: false +# +# # keep this in sync with go.mod in go-sdk/ +# - name: Setup Go +# uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0 +# with: +# go-version: 1.24 +# cache-dependency-path: go-sdk/go.sum +# +# - name: "Cleanup dist files" +# run: rm -fv ./dist/* +# +# - name: Run Go tests +# working-directory: ./go-sdk +# run: go test -v ./... + finalize-tests: name: Finalize tests permissions: @@ -549,6 +587,8 @@ jobs: notify-slack-failure: name: "Notify Slack on Failure" + needs: + - finalize-tests if: github.event_name == 'schedule' && failure() && github.run_attempt == 1 runs-on: ["ubuntu-22.04"] steps: @@ -566,5 +606,5 @@ jobs: - type: "section" text: type: "mrkdwn" - text: "🚨🕒 Scheduled CI Failure Alert (AMD) on branch *${{ github.ref_name }}* 🕒🚨\n\n*Details:* " + text: "🚨🕒 Scheduled CI Failure Alert (ARM) 🕒🚨\n\n*Details:* " # yamllint enable rule:line-length diff --git a/.github/workflows/ci-image-build.yml b/.github/workflows/ci-image-build.yml index b52fdb906fa2e..f58fe1946ca1f 100644 --- a/.github/workflows/ci-image-build.yml +++ b/.github/workflows/ci-image-build.yml @@ -117,7 +117,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout target branch" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" diff --git a/.github/workflows/ci-image-checks.yml b/.github/workflows/ci-image-checks.yml index a8535acfccbd5..a3baab28f29bc 100644 --- a/.github/workflows/ci-image-checks.yml +++ b/.github/workflows/ci-image-checks.yml @@ -127,7 +127,7 @@ jobs: run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: inputs.canary-run == 'true' - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false if: inputs.canary-run == 'true' @@ -173,7 +173,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -193,7 +193,7 @@ jobs: env: VERBOSE: "false" SKIP: ${{ inputs.skip-pre-commits }} - COLUMNS: "250" + COLUMNS: "202" SKIP_GROUP_OUTPUT: "true" DEFAULT_BRANCH: ${{ inputs.branch }} RUFF_FORMAT: "github" @@ -216,7 +216,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -235,7 +235,7 @@ jobs: run: pre-commit run --color always --verbose --hook-stage manual "$MYPY_CHECK" --all-files env: VERBOSE: "false" - COLUMNS: "250" + COLUMNS: "202" SKIP_GROUP_OUTPUT: "true" DEFAULT_BRANCH: ${{ inputs.branch }} RUFF_FORMAT: "github" @@ -264,7 +264,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -277,26 +277,25 @@ jobs: uses: apache/infrastructure-actions/stash/restore@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468 with: path: ./generated/_inventory_cache/ - # TODO(potiuk): do better with determining the key - key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }} + key: cache-docs-inventory-v1-${{ hashFiles('**/pyproject.toml') }} id: restore-docs-inventory-cache - name: "Building docs with ${{ matrix.flag }} flag" env: DOCS_LIST_AS_STRING: ${{ inputs.docs-list-as-string }} run: > - breeze build-docs ${DOCS_LIST_AS_STRING} ${{ matrix.flag }} + breeze build-docs ${DOCS_LIST_AS_STRING} ${{ matrix.flag }} --refresh-airflow-inventories - name: "Save docs inventory cache" uses: apache/infrastructure-actions/stash/save@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468 with: path: ./generated/_inventory_cache/ - key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }} + key: cache-docs-inventory-v1-${{ hashFiles('**/pyproject.toml') }} if-no-files-found: 'error' retention-days: '2' # If we upload from multiple matrix jobs we could end up with a race condition. so just pick one job # to be responsible for updating it. https://github.com/actions/upload-artifact/issues/506 if: steps.restore-docs-inventory-cache != 'true' && matrix.flag == '--docs-only' - name: "Upload build docs" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: airflow-docs path: './generated/_build' @@ -328,7 +327,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -338,7 +337,7 @@ jobs: python: ${{ inputs.default-python-version }} use-uv: ${{ inputs.use-uv }} - name: "Download docs prepared as artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: airflow-docs path: './generated/_build' @@ -403,12 +402,12 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 2 persist-credentials: false - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: "apache/airflow-client-python" fetch-depth: 1 diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 1fcf81a84fd5b..28c8cfae81a07 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -47,17 +47,17 @@ jobs: security-events: write steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3.29.0 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3.29.0 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3.29.0 diff --git a/.github/workflows/finalize-tests.yml b/.github/workflows/finalize-tests.yml index 6afcbd0812219..2535d29c5f063 100644 --- a/.github/workflows/finalize-tests.yml +++ b/.github/workflows/finalize-tests.yml @@ -99,7 +99,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: # Needed to perform push action persist-credentials: false @@ -109,16 +109,16 @@ jobs: id: constraints-branch run: ./scripts/ci/constraints/ci_branch_constraints.sh >> ${GITHUB_OUTPUT} - name: Checkout ${{ steps.constraints-branch.outputs.branch }} - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: path: "constraints" ref: ${{ steps.constraints-branch.outputs.branch }} persist-credentials: true fetch-depth: 0 - name: "Download constraints from the constraints generated by build CI image" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: - name: constraints + pattern: constraints-* path: ./files - name: "Diff in constraints for Python: ${{ inputs.python-versions-list-as-string }}" run: ./scripts/ci/constraints/ci_diff_constraints.sh @@ -132,6 +132,42 @@ jobs: run: git push + dependency-upgrade-summary: + runs-on: ${{ fromJSON(inputs.runners) }} + needs: [update-constraints] + if: inputs.upgrade-to-newer-dependencies == 'true' && inputs.platform == 'linux/amd64' + name: "Dependencies ${{ matrix.python-version }}:${{ matrix.constraints-mode }}" + strategy: + matrix: + python-version: ${{ fromJson(inputs.python-versions) }} + constraints-mode: ["constraints", "constraints-source-providers", "constraints-no-providers"] + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" + uses: ./.github/actions/prepare_breeze_and_image + with: + platform: ${{ inputs.platform }} + python: ${{ matrix.python-version }} + use-uv: ${{ inputs.use-uv }} + - name: "Dependency upgrade summary" + shell: bash + run: > + breeze release-management constraints-version-check + --python "${MATRIX_PYTHON_VERSION}" + --airflow-constraints-mode "${MATRIX_CONSTRAINTS_MODE}" --explain-why + env: + MATRIX_PYTHON_VERSION: "${{ matrix.python-version }}" + MATRIX_CONSTRAINTS_MODE: "${{ matrix.constraints-mode }}" + VERBOSE: "false" + push-buildx-cache-to-github-registry: name: Push Regular Image Cache ${{ inputs.platform }} needs: [update-constraints] diff --git a/.github/workflows/generate-constraints.yml b/.github/workflows/generate-constraints.yml index 35b31db98e575..d2d0db0685a58 100644 --- a/.github/workflows/generate-constraints.yml +++ b/.github/workflows/generate-constraints.yml @@ -32,10 +32,18 @@ on: # yamllint disable-line rule:truthy description: "Stringified array of all Python versions to test - separated by spaces." required: true type: string + python-versions: + description: "JSON-formatted array of Python versions to generate constraints for" + required: true + type: string generate-no-providers-constraints: description: "Whether to generate constraints without providers (true/false)" required: true type: string + generate-pypi-constraints: + description: "Whether to generate PyPI constraints (true/false)" + required: true + type: string debug-resources: description: "Whether to run in debug mode (true/false)" required: true @@ -45,94 +53,86 @@ on: # yamllint disable-line rule:truthy required: true type: string jobs: - generate-constraints: + generate-constraints-matrix: permissions: contents: read timeout-minutes: 70 - name: Generate constraints ${{ inputs.python-versions-list-as-string }} + name: Generate constraints for ${{ matrix.python-version }} on ${{ inputs.platform }} runs-on: ${{ fromJSON(inputs.runners) }} + strategy: + matrix: + python-version: ${{ fromJson(inputs.python-versions) }} env: DEBUG_RESOURCES: ${{ inputs.debug-resources }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} INCLUDE_SUCCESS_OUTPUTS: "true" - PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} + PYTHON_VERSION: ${{ matrix.python-version }} VERBOSE: "true" steps: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - - name: "Prepare and cleanup runner" - run: ./scripts/ci/prepare_and_cleanup_runner.sh - shell: bash - - name: "Install Breeze" - uses: ./.github/actions/breeze - with: - use-uv: ${{ inputs.use-uv }} - id: breeze - - name: "Prepare all CI images: ${{ inputs.python-versions-list-as-string}}" - uses: ./.github/actions/prepare_all_ci_images + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" + uses: ./.github/actions/prepare_breeze_and_image with: platform: ${{ inputs.platform }} - python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} - docker-volume-location: "" # TODO(jscheffl): Understand why it fails here and fix it - - name: "Verify all CI images ${{ inputs.python-versions-list-as-string }}" - run: breeze ci-image verify --run-in-parallel + python: ${{ matrix.python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Source constraints" shell: bash run: > - breeze release-management generate-constraints --run-in-parallel + breeze release-management generate-constraints --airflow-constraints-mode constraints-source-providers --answer yes + --python "${PYTHON_VERSION}" - name: "No providers constraints" shell: bash timeout-minutes: 25 run: > - breeze release-management generate-constraints --run-in-parallel - --airflow-constraints-mode constraints-no-providers --answer yes --parallelism 3 - # The no providers constraints are only needed when we want to update constraints (in canary builds) - # They slow down the start of PROD image builds so we want to only run them when needed. + breeze release-management generate-constraints + --airflow-constraints-mode constraints-no-providers --answer yes + --python "${PYTHON_VERSION}" if: inputs.generate-no-providers-constraints == 'true' - name: "Prepare updated provider distributions" - # In case of provider distributions which are not yet released, we build them from sources shell: bash run: > - breeze release-management prepare-provider-distributions --include-not-ready-providers - --distribution-format wheel + breeze release-management prepare-provider-distributions + --include-not-ready-providers --distribution-format wheel + if: inputs.generate-pypi-constraints == 'true' - name: "Prepare airflow distributions" shell: bash run: > breeze release-management prepare-airflow-distributions --distribution-format wheel + if: inputs.generate-pypi-constraints == 'true' - name: "Prepare task-sdk distribution" shell: bash run: > breeze release-management prepare-task-sdk-distributions --distribution-format wheel + if: inputs.generate-pypi-constraints == 'true' - name: "PyPI constraints" shell: bash timeout-minutes: 25 run: | - for PYTHON in $PYTHON_VERSIONS; do - breeze release-management generate-constraints --airflow-constraints-mode constraints \ - --answer yes --python "${PYTHON}" - done - - name: "Dependency upgrade summary" - shell: bash - env: - PYTHON_VERSIONS: ${{ env.PYTHON_VERSIONS }} - run: | - for PYTHON_VERSION in $PYTHON_VERSIONS; do - echo "Summarizing Python $PYTHON_VERSION" - cat "files/constraints-${PYTHON_VERSION}"/*.md >> $GITHUB_STEP_SUMMARY || true - df -H - done + breeze release-management generate-constraints --airflow-constraints-mode constraints \ + --answer yes --python "${PYTHON_VERSION}" + if: inputs.generate-pypi-constraints == 'true' - name: "Upload constraint artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: - name: constraints - path: ./files/constraints-*/constraints-*.txt + name: constraints-${{ matrix.python-version }} + path: ./files/constraints-${{ matrix.python-version }}/constraints-*.txt retention-days: 7 if-no-files-found: error + - name: "Dependency upgrade summary" + shell: bash + env: + PYTHON_VERSION: ${{ matrix.python-version }} + run: | + echo "Summarizing Python $PYTHON_VERSION" + cat "files/constraints-${PYTHON_VERSION}"/*.md >> $GITHUB_STEP_SUMMARY || true + df -H diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index 26bf28cce1beb..e889e25160cea 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -68,7 +68,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -93,7 +93,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -130,13 +130,12 @@ jobs: SIGN_WITH: dev@airflow.apache.org - name: "Fetch Git Tags" run: git fetch --tags - - name: "Test helm chart issue generation automatically" - # Adding same tags for now, will address in a follow-up + - name: "Test helm chart issue generation" run: > - breeze release-management generate-issue-content-helm-chart --limit-pr-count 10 - --latest --verbose + breeze release-management generate-issue-content-helm-chart --limit-pr-count 2 + --previous-release helm-chart/1.15.0 --current-release helm-chart/1.16.0 --verbose - name: "Upload Helm artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: Helm artifacts path: ./dist/airflow-* diff --git a/.github/workflows/integration-system-tests.yml b/.github/workflows/integration-system-tests.yml index bf75cc87f31a3..6619bedd7c65e 100644 --- a/.github/workflows/integration-system-tests.yml +++ b/.github/workflows/integration-system-tests.yml @@ -97,7 +97,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -146,7 +146,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -190,7 +190,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" diff --git a/.github/workflows/k8s-tests.yml b/.github/workflows/k8s-tests.yml index 6ed0c79d187c5..37aa3aa703ce1 100644 --- a/.github/workflows/k8s-tests.yml +++ b/.github/workflows/k8s-tests.yml @@ -80,7 +80,7 @@ jobs: echo "PYTHON_MAJOR_MINOR_VERSION=${KUBERNETES_COMBO}" | sed 's/-.*//' >> $GITHUB_ENV echo "KUBERNETES_VERSION=${KUBERNETES_COMBO}" | sed 's/=[^-]*-/=/' >> $GITHUB_ENV - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false # env.PYTHON_MAJOR_MINOR_VERSION, env.KUBERNETES_VERSION are set in the previous @@ -115,7 +115,7 @@ jobs: - name: "\ Upload KinD logs ${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-\ ${{ matrix.use-standard-naming }}" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: "\ kind-logs-${{ matrix.kubernetes-combo }}-${{ matrix.executor }}-\ diff --git a/.github/workflows/news-fragment.yml b/.github/workflows/news-fragment.yml index f6f68d1288a35..04e308c306138 100644 --- a/.github/workflows/news-fragment.yml +++ b/.github/workflows/news-fragment.yml @@ -30,7 +30,7 @@ jobs: if: "contains(github.event.pull_request.labels.*.name, 'airflow3.0:breaking')" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false # `towncrier check` runs `git diff --name-only origin/main...`, which diff --git a/.github/workflows/prod-image-build.yml b/.github/workflows/prod-image-build.yml index c5cf5cd5cef96..4819ee7be1470 100644 --- a/.github/workflows/prod-image-build.yml +++ b/.github/workflows/prod-image-build.yml @@ -124,7 +124,7 @@ jobs: run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" if: inputs.upload-package-artifact == 'true' - name: "Checkout target branch" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -171,7 +171,7 @@ jobs: breeze release-management prepare-airflow-ctl-distributions --distribution-format wheel if: inputs.upload-package-artifact == 'true' - name: "Upload prepared packages as artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: prod-packages path: ./dist @@ -211,7 +211,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout target branch" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -224,16 +224,16 @@ jobs: shell: bash run: rm -fv ./dist/* ./docker-context-files/* - name: "Download packages prepared as artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: prod-packages path: ./docker-context-files - name: "Show downloaded packages" run: ls -la ./docker-context-files - name: "Download constraints" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: - name: constraints + pattern: constraints-* path: ./docker-context-files - name: "Show constraints" run: | diff --git a/.github/workflows/publish-docs-to-s3.yml b/.github/workflows/publish-docs-to-s3.yml index 748a466cf8ce6..6139d33182e50 100644 --- a/.github/workflows/publish-docs-to-s3.yml +++ b/.github/workflows/publish-docs-to-s3.yml @@ -24,31 +24,30 @@ on: # yamllint disable-line rule:truthy description: "The branch or tag to checkout for the docs publishing" required: true type: string - include-docs: - description: | - Space separated list of docs to build. - To publish docs for nested provider packages, provide the package name with `.` - eg: amazon common.messaging apache.kafka - + destination: + description: "The destination location in S3" required: false - default: "all-providers" + default: auto + type: choice + options: + - auto + - live + - staging + include-docs: + description: "Space separated list of packages to build" + required: true type: string exclude-docs: description: "Comma separated list of docs to exclude" required: false default: "no-docs-excluded" type: string - destination-location: - description: "The destination location in S3, default is live site" + skip-write-to-stable-folder: + description: "Do not override stable version" required: false - default: "s3://live-docs-airflow-apache-org/docs" - type: choice - options: - - s3://live-docs-airflow-apache-org/docs - - s3://staging-docs-airflow-apache-org/docs + default: false + type: boolean -env: - AIRFLOW_ROOT_PATH: "/home/runner/work/temp-airflow-repo-reference" # checkout dir for referenced tag permissions: contents: read jobs: @@ -62,9 +61,15 @@ jobs: REF: ${{ inputs.ref }} INCLUDE_DOCS: ${{ inputs.include-docs }} EXCLUDE_DOCS: ${{ inputs.exclude-docs }} - DESTINATION_LOCATION: ${{ inputs.destination-location }} + DESTINATION: ${{ inputs.destination }} + SKIP_WRITE_TO_STABLE_FOLDER: ${{ inputs.skip-write-to-stable-folder }} outputs: include-docs: ${{ inputs.include-docs == 'all' && '' || inputs.include-docs }} + destination-location: ${{ steps.parameters.outputs.destination-location }} + destination: ${{ steps.parameters.outputs.destination }} + extra-build-options: ${{ steps.parameters.outputs.extra-build-options }} + # yamllint disable rule:line-length + skip-write-to-stable-folder: ${{ inputs.skip-write-to-stable-folder && '--skip-write-to-stable-folder' || '' }} if: contains(fromJSON('[ "ashb", "eladkal", @@ -78,39 +83,33 @@ jobs: steps: - name: "Input parameters summary" shell: bash + id: parameters run: | echo "Input parameters summary" echo "=========================" echo "Ref: '${REF}'" echo "Included docs : '${INCLUDE_DOCS}'" echo "Exclude docs: '${EXCLUDE_DOCS}'" - echo "Destination location: '${DESTINATION_LOCATION}'" - - build-ci-images: - name: Build CI images - uses: ./.github/workflows/ci-image-build.yml - needs: [build-info] - permissions: - contents: read - # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs - # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - packages: write - with: - runners: '["ubuntu-22.04"]' - platform: "linux/amd64" - push-image: "false" - upload-image-artifact: "true" - upload-mount-cache-artifact: false - python-versions: "['3.9']" - branch: ${{ inputs.ref }} - use-uv: true - upgrade-to-newer-dependencies: false - constraints-branch: "constraints-main" - docker-cache: registry - disable-airflow-repo-cache: false + echo "Destination: '${DESTINATION}'" + echo "Skip write to stable folder: '${SKIP_WRITE_TO_STABLE_FOLDER}'" + if [[ "${DESTINATION}" == "auto" ]]; then + if [[ "${REF}" =~ ^.*[0-9]*\.[0-9]*\.[0-9]*$ ]]; then + echo "${REF} looks like final release, using live destination" + DESTINATION="live" + else + echo "${REF} does not looks like final release, using staging destination" + DESTINATION="staging" + fi + fi + echo "destination=${DESTINATION}" >> ${GITHUB_OUTPUT} + if [[ "${DESTINATION}" == "live" ]]; then + echo "destination-location=s3://live-docs-airflow-apache-org/docs/" >> ${GITHUB_OUTPUT} + else + echo "destination-location=s3://staging-docs-airflow-apache-org/docs/" >> ${GITHUB_OUTPUT} + fi build-docs: - needs: [build-ci-images, build-info] + needs: [build-info] timeout-minutes: 150 name: "Build documentation" runs-on: ubuntu-latest @@ -118,40 +117,60 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - INCLUDE_NOT_READY_PROVIDERS: "true" INCLUDE_SUCCESS_OUTPUTS: false PYTHON_MAJOR_MINOR_VERSION: 3.9 VERBOSE: "true" + EXTRA_BUILD_OPTIONS: ${{ needs.build-info.outputs.extra-build-options }} steps: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - name: "Checkout ${{ github.ref }} " - uses: actions/checkout@v4 + # Check out the repo first to run cleanup - in sub-folder + - name: "Checkout current version first to clean-up stuff" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - - name: "Checkout from ${{ inputs.ref }} to build docs" - run: | - git clone https://github.com/apache/airflow.git "${AIRFLOW_ROOT_PATH}" - cd "${AIRFLOW_ROOT_PATH}" && git checkout ${REF} - env: - REF: ${{ inputs.ref }} - - name: "Prepare breeze & CI image: 3.9" - uses: ./.github/actions/prepare_breeze_and_image + path: current-version + - name: "Prepare and cleanup runner" + run: ./scripts/ci/prepare_and_cleanup_runner.sh + working-directory: current-version + # We are checking repo for both - breeze and docs from the ref provided as input + # This will take longer as we need to rebuild CI image and it will not use cache + # but it will build the CI image from the version of Airflow that is used to check out things + - name: "Checkout ${{ inputs.ref }} " + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + ref: ${{ inputs.ref }} + fetch-depth: 0 + fetch-tags: true + - name: "Install Breeze" + uses: ./.github/actions/breeze with: - platform: "linux/amd64" - python: 3.9 - use-uv: true + use-uv: ${{ inputs.use-uv }} - name: "Building docs with --docs-only flag" + env: + INCLUDE_DOCS: ${{ needs.build-info.outputs.include-docs }} + INCLUDE_COMMITS: "true" + run: > + breeze build-docs ${INCLUDE_DOCS} --docs-only + - name: Check disk space available + run: df -H + # Here we will create temp airflow-site dir to publish docs + - name: Create /mnt/airflow-site directory + run: | + sudo mkdir -p /mnt/airflow-site && sudo chown -R "${USER}" /mnt/airflow-site + echo "AIRFLOW_SITE_DIRECTORY=/mnt/airflow-site/" >> "$GITHUB_ENV" + - name: "Publish docs to /mnt/airflow-site directory" env: INCLUDE_DOCS: ${{ needs.build-info.outputs.include-docs }} run: > - breeze build-docs ${INCLUDE_DOCS} --docs-only --include-commits + breeze release-management publish-docs --override-versioned --run-in-parallel ${INCLUDE_DOCS} - name: "Upload build docs" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: airflow-docs - path: ${{ env.AIRFLOW_ROOT_PATH }}/generated/_build + path: /mnt/airflow-site retention-days: '7' if-no-files-found: 'error' overwrite: 'true' @@ -167,7 +186,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - INCLUDE_NOT_READY_PROVIDERS: "true" INCLUDE_SUCCESS_OUTPUTS: false PYTHON_MAJOR_MINOR_VERSION: 3.9 VERBOSE: "true" @@ -175,51 +193,39 @@ jobs: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - name: "Checkout ${{ github.ref }} " - uses: actions/checkout@v4 + # We are checking repo for both - breeze and docs from the "workflow' branch + # This will take longer as we need to rebuild CI image and it will not use cache + # but it will build the CI image from the version of Airflow that is used to check out things + - name: "Checkout ${{ inputs.ref }} " + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - - name: "Prepare breeze & CI image: 3.9" - uses: ./.github/actions/prepare_breeze_and_image + - name: "Prepare and cleanup runner" + run: ./scripts/ci/prepare_and_cleanup_runner.sh + - name: "Install Breeze" + uses: ./.github/actions/breeze with: - platform: "linux/amd64" - python: 3.9 - use-uv: true - - name: "Checkout ${{ inputs.ref }}" - run: | - git clone https://github.com/apache/airflow.git "${AIRFLOW_ROOT_PATH}" - cd "${AIRFLOW_ROOT_PATH}" && git checkout ${REF} - env: - REF: ${{ inputs.ref }} + use-uv: ${{ inputs.use-uv }} - name: "Download docs prepared as artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: airflow-docs - path: ${{ env.AIRFLOW_ROOT_PATH }}/generated/_build + path: /mnt/airflow-site - name: Check disk space available run: df -H - # Here we will create temp airflow-site dir to publish - # docs and for back-references - - name: Create /mnt/airflow-site directory - run: | - sudo mkdir -p /mnt/airflow-site && sudo chown -R "${USER}" /mnt/airflow-site - echo "AIRFLOW_SITE_DIRECTORY=/mnt/airflow-site/" >> "$GITHUB_ENV" - - name: "Publish docs to tmp directory" + - name: "Update watermarks" env: - INCLUDE_DOCS: ${{ needs.build-info.outputs.include-docs }} - run: > - breeze release-management publish-docs --override-versioned --run-in-parallel - ${INCLUDE_DOCS} - - name: Check disk space available - run: df -H - - name: "Generate back references for providers" - run: breeze release-management add-back-references all-providers - - name: "Generate back references for apache-airflow" - run: breeze release-management add-back-references apache-airflow - - name: "Generate back references for docker-stack" - run: breeze release-management add-back-references docker-stack - - name: "Generate back references for helm-chart" - run: breeze release-management add-back-references helm-chart + SOURCE_DIR_PATH: "/mnt/airflow-site/docs-archive/" + # yamllint disable rule:line-length + run: | + curl -sSf -o add_watermark.py https://raw.githubusercontent.com/apache/airflow-site/refs/heads/main/.github/scripts/add_watermark.py \ + --header "Authorization: Bearer ${{ github.token }} " --header "X-GitHub-Api-Version: 2022-11-28" + chmod a+x add_watermark.py + mkdir -p images + curl -sSf -o images/staging.png https://raw.githubusercontent.com/apache/airflow-site/refs/heads/main/.github/scripts/images/staging.png + uv run add_watermark.py --pattern 'main.min*css' --folder ${SOURCE_DIR_PATH} \ + --image-directory images --url-prefix /images + if: needs.build-info.outputs.destination == 'staging' - name: Install AWS CLI v2 run: | curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip @@ -235,10 +241,11 @@ jobs: aws-region: us-east-2 - name: "Syncing docs to S3" env: - DESTINATION_LOCATION: "${{ inputs.destination-location }}" + DESTINATION_LOCATION: "${{ needs.build-info.outputs.destination-location }}" SOURCE_DIR_PATH: "/mnt/airflow-site/docs-archive/" EXCLUDE_DOCS: "${{ inputs.exclude-docs }}" + SKIP_WRITE_TO_STABLE_FOLDER: "${{ needs.build-info.outputs.skip-write-to-stable-folder }}" run: | breeze release-management publish-docs-to-s3 --source-dir-path ${SOURCE_DIR_PATH} \ --destination-location ${DESTINATION_LOCATION} --stable-versions \ - --exclude-docs ${EXCLUDE_DOCS} --overwrite + --exclude-docs ${EXCLUDE_DOCS} --overwrite ${SKIP_WRITE_TO_STABLE_FOLDER} diff --git a/.github/workflows/push-image-cache.yml b/.github/workflows/push-image-cache.yml index f2258c13b77a9..b523577b46c99 100644 --- a/.github/workflows/push-image-cache.yml +++ b/.github/workflows/push-image-cache.yml @@ -113,7 +113,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -184,7 +184,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -196,7 +196,7 @@ jobs: - name: "Cleanup dist and context file" run: rm -fv ./dist/* ./docker-context-files/* - name: "Download packages prepared as artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: prod-packages path: ./docker-context-files diff --git a/.github/workflows/recheck-old-bug-report.yml b/.github/workflows/recheck-old-bug-report.yml index 217092b86f87e..c245f73923216 100644 --- a/.github/workflows/recheck-old-bug-report.yml +++ b/.github/workflows/recheck-old-bug-report.yml @@ -28,7 +28,7 @@ jobs: recheck-old-bug-report: runs-on: ["ubuntu-22.04"] steps: - - uses: actions/stale@v9 + - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0 with: only-issue-labels: 'kind:bug' stale-issue-label: 'Stale Bug Report' diff --git a/.github/workflows/release_dockerhub_image.yml b/.github/workflows/release_dockerhub_image.yml index fe1ce300fbf4f..2499e521d74a3 100644 --- a/.github/workflows/release_dockerhub_image.yml +++ b/.github/workflows/release_dockerhub_image.yml @@ -83,7 +83,7 @@ jobs: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" diff --git a/.github/workflows/release_single_dockerhub_image.yml b/.github/workflows/release_single_dockerhub_image.yml index 55a8c2cc00429..fd572adbabab1 100644 --- a/.github/workflows/release_single_dockerhub_image.yml +++ b/.github/workflows/release_single_dockerhub_image.yml @@ -76,7 +76,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -148,7 +148,7 @@ jobs: shell: bash run: find ./dist -name '*.json' - name: "Upload metadata artifact ${{ env.ARTIFACT_NAME }}" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: ${{ env.ARTIFACT_NAME }} path: ./dist/metadata-* @@ -174,7 +174,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare and cleanup runner" @@ -197,7 +197,7 @@ jobs: ACTOR: ${{ github.actor }} run: echo "${GITHUB_TOKEN}" | docker login ghcr.io -u ${ACTOR} --password-stdin - name: "Download metadata artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: path: ./dist pattern: metadata-${{ inputs.pythonVersion }}-* diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 4b99ac6031137..035248113dac1 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -167,7 +167,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 2e03e9f33b120..5724a17314aec 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -29,7 +29,7 @@ jobs: stale: runs-on: ["ubuntu-22.04"] steps: - - uses: actions/stale@v9 + - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0 with: stale-pr-message: > This pull request has been automatically marked as stale because it has not had diff --git a/.github/workflows/test-providers.yml b/.github/workflows/test-providers.yml index 411b18daaa7b4..525e8e8599a9d 100644 --- a/.github/workflows/test-providers.yml +++ b/.github/workflows/test-providers.yml @@ -89,7 +89,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" @@ -138,7 +138,8 @@ jobs: - name: "Install and verify wheel provider distributions" env: DISTRIBUTION_FORMAT: ${{ matrix.package-format }} - AIRFLOW_SKIP_CONSTRAINTS: "${{ inputs.upgrade-to-newer-dependencies }}" + # yamllint disable rule:line-length + INSTALL_AIRFLOW_WITH_CONSTRAINTS: "${{ inputs.upgrade-to-newer-dependencies == 'true' && 'false' || 'true' }}" run: > breeze release-management verify-provider-distributions --use-distributions-from-dist @@ -187,7 +188,7 @@ jobs: shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - name: "Prepare breeze & CI image: ${{ matrix.compat.python-version }}" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 01a585c19c456..3dc226a64dfdd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,8 @@ default_stages: [pre-commit, pre-push] default_language_version: python: python3 - node: 22.16.0 + node: 22.17.0 + golang: 1.24.0 minimum_pre_commit_version: '3.2.0' exclude: ^.*/.*_vendor/ repos: @@ -226,7 +227,8 @@ repos: entry: ./scripts/ci/pre_commit/check_deferrable_default.py pass_filenames: false # libcst doesn't have source wheels for all PY except PY3.12, excluding it - additional_dependencies: ['libcst>=1.1.0,!=1.8.0'] + # libcst 1.8.1 doesn't include typing-extensions which is needed for Python 3.9 + additional_dependencies: ['libcst>=1.1.0,!=1.8.0,!=1.8.1'] files: ^(providers/.*/)?airflow/.*/(sensors|operators)/.*\.py$ - repo: https://github.com/asottile/blacken-docs rev: 1.19.1 @@ -255,7 +257,13 @@ repos: exclude: ^providers/ssh/docs/connections/ssh\.rst$ - id: end-of-file-fixer name: Make sure that there is an empty line at the end - exclude: ^airflow-core/docs/img/.*\.dot|^airflow-core/docs/img/.*\.sha256|.*/dist/.*|LICENSES-ui\.txt$ + exclude: > + (?x) + ^airflow-core/docs/img/.*\.dot| + ^airflow-core/docs/img/.*\.sha256| + .*/dist/.*| + LICENSES-ui\.txt$| + .*/openapi-gen/.* - id: mixed-line-ending name: Detect if mixed line ending is used (\r vs. \r\n) - id: check-executables-have-shebangs @@ -267,7 +275,11 @@ repos: ^scripts/ci/docker-compose/gremlin/. - id: trailing-whitespace name: Remove trailing whitespace at end of line - exclude: ^airflow-core/docs/img/.*\.dot$|^dev/breeze/doc/images/output.*$ + exclude: > + (?x) + ^airflow-core/docs/img/.*\.dot$| + ^dev/breeze/doc/images/output.*$| + ^.*/openapi-gen/.*$ - id: fix-encoding-pragma name: Remove encoding header from Python files args: @@ -300,6 +312,7 @@ repos: ^.*airflow\.template\.yaml$| ^.*init_git_sync\.template\.yaml$| ^chart/(?:templates|files)/.*\.yaml$| + ^helm-tests/tests/chart_utils/keda.sh_scaledobjects\.yaml$| .*/v1.*\.yaml$| ^.*openapi.*\.yaml$| ^\.pre-commit-config\.yaml$| @@ -327,7 +340,17 @@ repos: The word(s) should be in lowercase." && exec codespell "$@"' -- language: python types: [text] - exclude: material-icons\.css$|^images/.*$|^RELEASE_NOTES\.txt$|^.*package-lock\.json$|^.*/kinglear\.txt$|^.*pnpm-lock\.yaml$|.*/dist/.* + exclude: > + (?x) + material-icons\.css$| + ^images/.*$| + ^RELEASE_NOTES\.txt$| + ^.*package-lock\.json$| + ^.*/kinglear\.txt$| + ^.*pnpm-lock\.yaml$| + .*/dist/.*| + ^airflow-core/src/airflow/ui/src/i18n/locales/de/| + ^airflow-core/src/airflow/ui/src/i18n/locales/pl/ args: - --ignore-words=docs/spelling_wordlist.txt - --skip=providers/.*/src/airflow/providers/*/*.rst,providers/*/docs/changelog.rst,docs/*/commits.rst,providers/*/docs/commits.rst,providers/*/*/docs/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md,*.svg @@ -389,7 +412,7 @@ repos: types_or: [python, pyi] args: [--fix] require_serial: true - additional_dependencies: ['ruff==0.11.2'] + additional_dependencies: ['ruff==0.11.13'] exclude: ^airflow-core/tests/unit/dags/test_imports\.py$|^performance/tests/test_.*\.py$ - id: ruff-format name: Run 'ruff format' @@ -399,14 +422,14 @@ repos: types_or: [python, pyi] args: [] require_serial: true - additional_dependencies: ['ruff==0.11.2'] + additional_dependencies: ['ruff==0.11.13'] exclude: ^airflow-core/tests/unit/dags/test_imports\.py$ - id: replace-bad-characters name: Replace bad characters entry: ./scripts/ci/pre_commit/replace_bad_characters.py language: python types: [file, text] - exclude: ^clients/gen/go\.sh$|^\.gitmodules$|.*/dist/.* + exclude: ^clients/gen/go\.sh$|^\.gitmodules$|.*/dist/.*|\.go$|/go\.(mod|sum)$ additional_dependencies: ['rich>=12.4.4'] - id: lint-dockerfile name: Lint Dockerfile @@ -441,7 +464,7 @@ repos: - id: check-airflow-providers-bug-report-template name: Sort airflow-bug-report provider list language: python - files: ^\.github/ISSUE_TEMPLATE/airflow_providers_bug_report\.yml$ + files: ^\.github/ISSUE_TEMPLATE/3-airflow_providers_bug_report\.yml$ require_serial: true entry: ./scripts/ci/pre_commit/check_airflow_bug_report_template.py additional_dependencies: ['rich>=12.4.4', 'pyyaml>=6.0.2'] @@ -605,7 +628,9 @@ repos: pass_filenames: true exclude: > (?x) + ^airflow-core/src/airflow/ui/src/i18n/config\.ts$| ^airflow-core/src/airflow/ui/openapi-gen/| + ^airflow-core/src/airflow/ui/src/i18n/locales/de/README\.md$| ^airflow-core/src/airflow/cli/commands/local_commands/fastapi_api_command\.py$| ^airflow-core/src/airflow/config_templates/| ^airflow-core/src/airflow/models/baseoperator\.py$| @@ -627,6 +652,7 @@ repos: ^providers/google/src/airflow/providers/google/cloud/operators/cloud_build\.py$| ^providers/google/src/airflow/providers/google/cloud/operators/dataproc\.py$| ^providers/google/src/airflow/providers/google/cloud/operators/mlengine\.py$| + ^providers/keycloak/src/airflow/providers/keycloak/auth_manager/cli/definition.py| ^providers/microsoft/azure/src/airflow/providers/microsoft/azure/hooks/cosmos\.py$| ^providers/microsoft/winrm/src/airflow/providers/microsoft/winrm/hooks/winrm\.py$| ^airflow-core/docs/.*commits\.rst$| @@ -640,6 +666,7 @@ repos: ^airflow-core/src/airflow/utils/trigger_rule\.py$| ^chart/values.schema\.json$| ^helm-tests/tests/chart_utils/helm_template_generator\.py$| + ^helm-tests/tests/chart_utils/ingress-networking-v1beta1\.json$| ^dev/| ^devel-common/src/docs/README\.rst$| ^docs/apache-airflow-providers-amazon/secrets-backends/aws-ssm-parameter-store\.rst$| @@ -855,7 +882,6 @@ repos: - id: compile-fab-assets name: Compile FAB provider assets language: node - 'types_or': [javascript, ts, tsx] files: ^providers/fab/.*/www/ entry: ./scripts/ci/pre_commit/compile_fab_assets.py pass_filenames: false @@ -1230,6 +1256,40 @@ repos: files: ^airflow-core/src/airflow/migrations/versions/.*\.py$ exclude: airflow-core/src/airflow/migrations/versions/0028_3_0_0_drop_ab_user_id_foreign_key.py + - id: ts-compile-lint-ui + name: Compile / format / lint UI + description: TS types generation / ESLint / Prettier new UI files + language: node + files: | + (?x) + ^airflow-core/src/airflow/ui/.*\.(js|ts|tsx|yaml|css|json)| + ^airflow-core/src/airflow/api_fastapi/core_api/openapi/.*\.yaml$| + ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v1.*\.yaml$ + exclude: | + (?x) + ^airflow-core/src/airflow/ui/node-modules/.*| + ^airflow-core/src/airflow/ui/.pnpm-store + entry: ./scripts/ci/pre_commit/ts_compile_lint_ui.py + additional_dependencies: ['pnpm@9.7.1'] + pass_filenames: true + require_serial: true + - id: ts-compile-lint-simple-auth-manager-ui + name: Compile / format / lint simple auth manager UI + description: TS types generation / ESLint / Prettier new UI files + language: node + files: | + (?x) + ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/.*\.(js|ts|tsx|yaml|css|json)| + ^airflow-core/src/airflow/api_fastapi/core_api/openapi/.*\.yaml$| + ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v1.*\.yaml$ + exclude: | + (?x) + ^airflow-core/src/airflow/api_fastapi/node-modules/.*| + ^airflow-core/src/airflow/api_fastapi/.pnpm-store + entry: ./scripts/ci/pre_commit/ts_compile_lint_simple_auth_manager_ui.py + additional_dependencies: ['pnpm@9.7.1'] + pass_filenames: true + require_serial: true ## ADD MOST PRE-COMMITS ABOVE THAT LINE # The below pre-commits are those requiring CI image to be built - id: mypy-dev @@ -1345,20 +1405,6 @@ repos: files: ^airflow-core/src/airflow/api_fastapi/.*\.py$|^airflow-core/src/airflow/api_fastapi/auth/managers/simple/.*\.py$|^providers/fab/src/airflow/providers/fab/auth_manager/api_fastapi/.*\.py$ exclude: ^airflow-core/src/airflow/api_fastapi/execution_api/.* additional_dependencies: ['rich>=12.4.4', 'openapi-spec-validator>=0.7.1'] - - id: ts-compile-format-lint-ui - name: Compile / format / lint UI - description: TS types generation / ESLint / Prettier new UI files - language: node - types_or: [javascript, ts, tsx, yaml, css, json] - files: | - (?x) - ^airflow-core/src/airflow/ui/| - ^airflow-core/src/airflow/api_fastapi/core_api/openapi/.*\.yaml$| - ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/| - ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v1.*\.yaml$ - entry: ./scripts/ci/pre_commit/compile_lint_ui.py - additional_dependencies: ['pnpm@9.7.1'] - pass_filenames: false - id: check-provider-yaml-valid name: Validate provider.yaml files entry: ./scripts/ci/pre_commit/check_provider_yaml_files.py diff --git a/Dockerfile b/Dockerfile index 0ca2986087de6..5b3a704c70cb8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,8 +56,8 @@ ARG PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` ARG AIRFLOW_PIP_VERSION=25.1.1 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_SETUPTOOLS_VERSION=80.8.0 -ARG AIRFLOW_UV_VERSION=0.7.8 +ARG AIRFLOW_SETUPTOOLS_VERSION=80.9.0 +ARG AIRFLOW_UV_VERSION=0.7.17 ARG AIRFLOW_USE_UV="false" ARG UV_HTTP_TIMEOUT="300" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" @@ -137,7 +137,7 @@ function get_runtime_apt_deps() { echo if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then RUNTIME_APT_DEPS="apt-transport-https apt-utils ca-certificates \ -curl dumb-init freetds-bin krb5-user libev4 libgeos-dev \ +curl dumb-init freetds-bin git krb5-user libev4 libgeos-dev \ ldap-utils libsasl2-2 libsasl2-modules libxmlsec1 locales ${debian_version_apt_deps} \ lsb-release openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc" export RUNTIME_APT_DEPS @@ -232,6 +232,24 @@ readonly MARIADB_LTS_VERSION="10.11" : "${INSTALL_MYSQL_CLIENT:?Should be true or false}" : "${INSTALL_MYSQL_CLIENT_TYPE:-mariadb}" +retry() { + local retries=3 + local count=0 + # adding delay of 10 seconds + local delay=10 + until "$@"; do + exit_code=$? + count=$((count + 1)) + if [[ $count -lt $retries ]]; then + echo "Command failed. Attempt $count/$retries. Retrying in ${delay}s..." + sleep $delay + else + echo "Command failed after $retries attempts." + return $exit_code + fi + done +} + install_mysql_client() { if [[ "${1}" == "dev" ]]; then packages=("libmysqlclient-dev" "mysql-client") @@ -257,8 +275,8 @@ install_mysql_client() { echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_LTS_VERSION}" > \ /etc/apt/sources.list.d/mysql.list - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" + retry apt-get update + retry apt-get install --no-install-recommends -y "${packages[@]}" apt-get autoremove -yqq --purge apt-get clean && rm -rf /var/lib/apt/lists/* @@ -302,8 +320,8 @@ install_mariadb_client() { /etc/apt/sources.list.d/mariadb.list # Make sure that dependencies from MariaDB repo are preferred over Debian dependencies printf "Package: *\nPin: release o=MariaDB\nPin-Priority: 999\n" > /etc/apt/preferences.d/mariadb - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" + retry apt-get update + retry apt-get install --no-install-recommends -y "${packages[@]}" apt-get autoremove -yqq --purge apt-get clean && rm -rf /var/lib/apt/lists/* } @@ -655,7 +673,7 @@ if [[ $(id -u) == "0" ]]; then echo echo "${COLOR_RED}You are running pip as root. Please use 'airflow' user to run pip!${COLOR_RESET}" echo - echo "${COLOR_YELLOW}See: https://airflow.apache.org/docs/docker-stack/build.html#adding-a-new-pypi-package${COLOR_RESET}" + echo "${COLOR_YELLOW}See: https://airflow.apache.org/docs/docker-stack/build.html#adding-new-pypi-packages-individually${COLOR_RESET}" echo exit 1 fi @@ -1295,7 +1313,7 @@ function check_uid_gid() { >&2 echo " This is to make sure you can run the image with an arbitrary UID in the future." >&2 echo >&2 echo " See more about it in the Airflow's docker image documentation" - >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" + >&2 echo " https://airflow.apache.org/docs/docker-stack/entrypoint.html" >&2 echo # We still allow the image to run with `airflow` user. return @@ -1309,7 +1327,7 @@ function check_uid_gid() { >&2 echo " This is to make sure you can run the image with an arbitrary UID." >&2 echo >&2 echo " See more about it in the Airflow's docker image documentation" - >&2 echo " http://airflow.apache.org/docs/docker-stack/entrypoint" + >&2 echo " https://airflow.apache.org/docs/docker-stack/entrypoint.html" # This will not work so we fail hard exit 1 fi diff --git a/Dockerfile.ci b/Dockerfile.ci index 4c46efbf2dc2c..0d95a48e1800f 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -76,7 +76,7 @@ function get_runtime_apt_deps() { echo if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then RUNTIME_APT_DEPS="apt-transport-https apt-utils ca-certificates \ -curl dumb-init freetds-bin krb5-user libev4 libgeos-dev \ +curl dumb-init freetds-bin git krb5-user libev4 libgeos-dev \ ldap-utils libsasl2-2 libsasl2-modules libxmlsec1 locales ${debian_version_apt_deps} \ lsb-release openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc" export RUNTIME_APT_DEPS @@ -171,6 +171,24 @@ readonly MARIADB_LTS_VERSION="10.11" : "${INSTALL_MYSQL_CLIENT:?Should be true or false}" : "${INSTALL_MYSQL_CLIENT_TYPE:-mariadb}" +retry() { + local retries=3 + local count=0 + # adding delay of 10 seconds + local delay=10 + until "$@"; do + exit_code=$? + count=$((count + 1)) + if [[ $count -lt $retries ]]; then + echo "Command failed. Attempt $count/$retries. Retrying in ${delay}s..." + sleep $delay + else + echo "Command failed after $retries attempts." + return $exit_code + fi + done +} + install_mysql_client() { if [[ "${1}" == "dev" ]]; then packages=("libmysqlclient-dev" "mysql-client") @@ -196,8 +214,8 @@ install_mysql_client() { echo "deb http://repo.mysql.com/apt/debian/ $(lsb_release -cs) mysql-${MYSQL_LTS_VERSION}" > \ /etc/apt/sources.list.d/mysql.list - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" + retry apt-get update + retry apt-get install --no-install-recommends -y "${packages[@]}" apt-get autoremove -yqq --purge apt-get clean && rm -rf /var/lib/apt/lists/* @@ -241,8 +259,8 @@ install_mariadb_client() { /etc/apt/sources.list.d/mariadb.list # Make sure that dependencies from MariaDB repo are preferred over Debian dependencies printf "Package: *\nPin: release o=MariaDB\nPin-Priority: 999\n" > /etc/apt/preferences.d/mariadb - apt-get update - apt-get install --no-install-recommends -y "${packages[@]}" + retry apt-get update + retry apt-get install --no-install-recommends -y "${packages[@]}" apt-get autoremove -yqq --purge apt-get clean && rm -rf /var/lib/apt/lists/* } @@ -813,7 +831,6 @@ if [[ ${VERBOSE_COMMANDS:="false"} == "true" ]]; then set -x fi - . "${AIRFLOW_SOURCES:-/opt/airflow}"/scripts/in_container/_in_container_script_init.sh LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" @@ -831,6 +848,10 @@ mkdir "${AIRFLOW_HOME}/sqlite" -p || true ASSET_COMPILATION_WAIT_MULTIPLIER=${ASSET_COMPILATION_WAIT_MULTIPLIER:=1} +if [[ "${CI=}" == "true" ]]; then + export COLUMNS="202" +fi + . "${IN_CONTAINER_DIR}/check_connectivity.sh" function wait_for_asset_compilation() { @@ -996,7 +1017,7 @@ function determine_airflow_to_use() { echo "${COLOR_BLUE}Uninstalling all packages first${COLOR_RESET}" echo # shellcheck disable=SC2086 - ${PACKAGING_TOOL_CMD} freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | \ + ${PACKAGING_TOOL_CMD} freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | grep -v "@" | \ xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} # Now install rich ad click first to use the installation script # shellcheck disable=SC2086 @@ -1008,7 +1029,9 @@ function determine_airflow_to_use() { echo # Use uv run to install necessary dependencies automatically # in the future we will be able to use uv sync when `uv.lock` is supported - uv run /opt/airflow/scripts/in_container/install_development_dependencies.py \ + # for the use in parallel runs in docker containers--no-cache is needed - otherwise there is + # possibility of overriding temporary environments by multiple parallel processes + uv run --no-cache /opt/airflow/scripts/in_container/install_development_dependencies.py \ --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt # Some packages might leave legacy typing module which causes test issues # shellcheck disable=SC2086 @@ -1040,7 +1063,7 @@ function check_boto_upgrade() { # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} aiobotocore s3fs || true # shellcheck disable=SC2086 - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade boto3 botocore + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade "boto3<1.38.3" "botocore<1.38.3" set +x } @@ -1119,23 +1142,32 @@ function check_airflow_python_client_installation() { python "${IN_CONTAINER_DIR}/install_airflow_python_client.py" } +function initialize_db() { + # If we are going to start the api server OR we are a system test (which may or may not start the api server, + # depending on the Airflow version being used to run the tests), then migrate the DB. + if [[ ${START_API_SERVER_WITH_EXAMPLES=} == "true" || ${TEST_GROUP:=""} == "system" ]]; then + echo + echo "${COLOR_BLUE}Initializing database${COLOR_RESET}" + echo + airflow db migrate + echo + echo "${COLOR_BLUE}Database initialized${COLOR_RESET}" + fi +} + function start_api_server_with_examples(){ - # check if we should not start the api server with examples by checking if both - # START_API_SERVER_WITH_EXAMPLES is false AND the TEST_GROUP env var is not equal to "system" + USE_AIRFLOW_VERSION="${USE_AIRFLOW_VERSION:=""}" + # Do not start the api server if either START_API_SERVER_WITH_EXAMPLES is false or the TEST_GROUP env var is not + # equal to "system". if [[ ${START_API_SERVER_WITH_EXAMPLES=} != "true" && ${TEST_GROUP:=""} != "system" ]]; then return fi + # If the use Airflow version is set and it is <= 3.0.0 (which does not have the API server anyway) also return + if [[ ${USE_AIRFLOW_VERSION} != "" && ${USE_AIRFLOW_VERSION} < "3.0.0" ]]; then + return + fi export AIRFLOW__CORE__LOAD_EXAMPLES=True export AIRFLOW__API__EXPOSE_CONFIG=True - echo - echo "${COLOR_BLUE}Initializing database${COLOR_RESET}" - echo - airflow db migrate - echo - echo "${COLOR_BLUE}Database initialized${COLOR_RESET}" - echo - echo "${COLOR_BLUE}Parsing example dags${COLOR_RESET}" - echo airflow dags reserialize echo "Example dags parsing finished" if airflow config get-value core auth_manager | grep -q "FabAuthManager"; then @@ -1172,6 +1204,7 @@ check_downgrade_sqlalchemy check_downgrade_pendulum check_force_lowest_dependencies check_airflow_python_client_installation +initialize_db start_api_server_with_examples check_run_tests "${@}" @@ -1203,7 +1236,7 @@ ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" # NOTE! When you want to make sure dependencies are installed from scratch in your PR after removing # some dependencies, you also need to set "disable image cache" in your PR to make sure the image is # not built using the "main" version of those dependencies. -ARG DEPENDENCIES_EPOCH_NUMBER="14" +ARG DEPENDENCIES_EPOCH_NUMBER="15" # Make sure noninteractive debian install is used and language variables set ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ @@ -1359,8 +1392,8 @@ COPY --from=scripts common.sh install_packaging_tools.sh install_additional_depe # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` ARG AIRFLOW_PIP_VERSION=25.1.1 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_SETUPTOOLS_VERSION=80.8.0 -ARG AIRFLOW_UV_VERSION=0.7.8 +ARG AIRFLOW_SETUPTOOLS_VERSION=80.9.0 +ARG AIRFLOW_UV_VERSION=0.7.17 # TODO(potiuk): automate with upgrade check (possibly) ARG AIRFLOW_PRE_COMMIT_VERSION="4.2.0" ARG AIRFLOW_PRE_COMMIT_UV_VERSION="4.1.4" diff --git a/README.md b/README.md index 4048392413c15..769525febc749 100644 --- a/README.md +++ b/README.md @@ -299,7 +299,7 @@ Apache Airflow version life cycle: | Version | Current Patch/Minor | State | First Release | Limited Maintenance | EOL/Terminated | |-----------|-----------------------|-----------|-----------------|-----------------------|------------------| -| 3 | 3.0.2 | Supported | Apr 22, 2025 | TBD | TBD | +| 3 | 3.0.3 | Supported | Apr 22, 2025 | TBD | TBD | | 2 | 2.11.0 | Supported | Dec 17, 2020 | Oct 22, 2025 | Apr 22, 2026 | | 1.10 | 1.10.15 | EOL | Aug 27, 2018 | Dec 17, 2020 | June 17, 2021 | | 1.9 | 1.9.0 | EOL | Jan 03, 2018 | Aug 27, 2018 | Aug 27, 2018 | diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index f850c05fdb2ce..4b57f3be4aed1 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -24,6 +24,78 @@ .. towncrier release notes start +Airflow 3.0.3 (2025-07-08) +-------------------------- + +Significant Changes +^^^^^^^^^^^^^^^^^^^ + +No significant changes. + +Bug Fixes +""""""""" + +- Fix task execution failures with large data by improving internal communication protocol (#51924) +- Fix reschedule sensors failing after multiple re-queue attempts over long periods (#52706) +- Improve ``xcom_pull`` to cover different scenarios for mapped tasks (#51568) +- Fix connection retrieval failures in triggerer when schema field is used (#52691) +- Add back user impersonation (``run_as_user``) support for task execution (#51780) +- Fix DAG version not updating when bundle name changes without DAG structure changes (#51939) +- Add back ``exception`` to context for task callbacks (#52066) +- Fix task log retrieval for retry attempts showing incorrect logs (#51592) +- Fix data interval handling for DAGs created before AIP-39 during serialization (#51913) +- Fix lingering task supervisors when ``EOF`` is missed (#51180) (#51970) +- Persist ``EventsTimetable``'s description during serialization (#51926) +- Delete import error when a dag bundle becomes inactive (#51921) +- Cleanup import errors during DB migration (#51919) +- Fix ``EOF`` detection of subprocesses in Dag Processor (#51895) +- Stop streaming task logs if end of log mark is missing (#51482) +- Allow more empty loops before stopping log streaming (#52624) +- Fix Jinja2 Template deep copy error with ``dag.test`` (#51673) +- Explicitly close log file descriptor in the supervise function (#51654) +- Improve structured logging format and layout (#51567) (#51626) +- Use Connection Hook Names for Dropdown instead of connection IDs (#51613) +- Add back config setting to control exposing stacktrace (#51617) +- Fix task level alias resolution in structure endpoint (#51579) +- Fix backfill creation to include DAG run configuration from form (#51584) +- Fix structure edges in API responses (#51489) +- Make ``dag.test`` consistent with ``airflow dags test`` CLI command (#51476) +- Fix downstream asset attachment at task level in structure endpoint (#51425) +- Fix Task Instance ``No Status`` Filter (#52154) +- UI: Fix backfill creation to respect run backwards setting from form (#52168) +- UI: Set downstream option to default on task instance clear (#52246) +- UI: Enable iframe script execution (#52568) +- UI: Fix DAG tags filter not showing all tags in UI when tags are greater than 50 (#52714) +- UI: Add real-time clock updates to timezone selector (#52414) +- Improve Grid view performance and responsiveness with optimized data loading (#52718) +- Fix editing connection with sensitive extra field (#52445) +- Fix archival for cascading deletes by archiving dependent tables first (#51952) +- Fix whitespace handling in DAG owners parsing for multiple owners (#52221) +- Fix SQLite migration from 2.7.0 to 3.0.0 (#51431) +- Fix http exception when ti not found for extra links API (#51465) +- Fix Starting from Trigger when using ``MappedOperator`` (#52681) +- Add ti information to re-queue logs (#49995) +- Task SDK: Fix ``AssetEventOperations.get`` to use ``alias_name`` when specified (#52324) + +Miscellaneous +""""""""""""" + +- Update ``connections_test`` CLI to use Connection instead of BaseHook (#51834) (#51917) +- Fix table pagination when DAG filtering changes (#51795) +- UI: Move asset events to its own tab (#51655) +- Exclude ``libcst`` 1.8.1 for Python 3.9 (#51609) +- UI: Implement navigation on bar click (#50416) +- Reduce unnecessary logging when retrieving connections and variables (#51826) + +Doc Only Changes +"""""""""""""""" + +- Add note about payload size considerations in API docs (#51768) +- Enhance ENV vars and conns visibility docs (#52026) +- Add http-only warning when running behind proxy in documentation (#52699) +- Publish separate docs for Task SDK (#52682) +- Streamline Taskflow examples and link to core tutorial (#52709) + Airflow 3.0.2 (2025-06-10) -------------------------- @@ -71,7 +143,7 @@ Bug Fixes - Restored backward compatibility for the ``/run`` API endpoint for older Task SDK clients - Fixed dropdown overflow and error text styling in ``FlexibleForm`` component (#50845) - Corrected DAG tag rendering to display ``+1 more`` when tags exceed the display limit by one (#50669) -- Fix permission check on the ui config endpoint (#50608) +- Fix permission check on the ui config endpoint (#50564) - Fix ``default_args`` handling in operator ``.partial()`` to prevent ``TypeError`` when unused keys are present (#50525) - DAG Processor: Fix index to sort by last parsing duration (#50388) - UI: Fix border overlap issue in the Events page (#50453) diff --git a/airflow-core/docs/administration-and-deployment/dag-bundles.rst b/airflow-core/docs/administration-and-deployment/dag-bundles.rst index 7dc03ebab1451..057d354b1f523 100644 --- a/airflow-core/docs/administration-and-deployment/dag-bundles.rst +++ b/airflow-core/docs/administration-and-deployment/dag-bundles.rst @@ -83,6 +83,15 @@ For example, adding multiple dag bundles to your ``airflow.cfg`` file: You can also override the :ref:`config:dag_processor__refresh_interval` per dag bundle by passing it in kwargs. This controls how often the dag processor refreshes, or looks for new files, in the dag bundles. +Starting Airflow 3.0.2 git is pre installed in the base image. However, if you are using versions prior 3.0.2, you would need to install git in your docker image. + +.. code-block:: Dockerfile + + RUN apt-get update && apt-get install -y git + ENV GIT_PYTHON_GIT_EXECUTABLE=/usr/bin/git + ENV GIT_PYTHON_REFRESH=quiet + + Writing custom dag bundles -------------------------- diff --git a/airflow-core/docs/best-practices.rst b/airflow-core/docs/best-practices.rst index 268f3e7150f63..28c3285339ac2 100644 --- a/airflow-core/docs/best-practices.rst +++ b/airflow-core/docs/best-practices.rst @@ -296,8 +296,6 @@ When you execute that code you will see: This means that the ``get_array`` is not executed as top-level code, but ``get_task_id`` is. -.. _best_practices/dynamic_dag_generation: - Code Quality and Linting ------------------------ @@ -351,6 +349,7 @@ By integrating ``ruff`` into your development workflow, you can proactively addr For more information on ``ruff`` and its integration with Airflow, refer to the `official Airflow documentation `_. +.. _best_practices/dynamic_dag_generation: Dynamic DAG Generation ---------------------- diff --git a/airflow-core/docs/conf.py b/airflow-core/docs/conf.py index dfcafe1ed99a8..6ee9437509cc4 100644 --- a/airflow-core/docs/conf.py +++ b/airflow-core/docs/conf.py @@ -349,8 +349,8 @@ def add_airflow_core_exclude_patterns_to_sphinx(exclude_patterns: list[str]): graphviz_output_format = "svg" -main_openapi_path = Path(main_openapi_file).parent.joinpath("v1-rest-api-generated.yaml") -sam_openapi_path = Path(sam_openapi_file).parent.joinpath("v1-simple-auth-manager-generated.yaml") +main_openapi_path = Path(main_openapi_file).parent.joinpath("v2-rest-api-generated.yaml") +sam_openapi_path = Path(sam_openapi_file).parent.joinpath("v2-simple-auth-manager-generated.yaml") redoc = [ { "name": "Simple auth manager token API", diff --git a/airflow-core/docs/howto/connection.rst b/airflow-core/docs/howto/connection.rst index c753e1342660f..84aa1648b8224 100644 --- a/airflow-core/docs/howto/connection.rst +++ b/airflow-core/docs/howto/connection.rst @@ -115,9 +115,16 @@ If serializing with Airflow URI: See :ref:`Connection URI format ` for more details on how to generate the a valid URI. -.. note:: +Visibility in UI and CLI +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Connections defined through environment variables are **not displayed** in the Airflow UI or listed using ``airflow connections list``. + +This is because these connections are **resolved dynamically at runtime**, typically on the **worker** process executing your task. They are not stored in the metadata database or loaded in the webserver or scheduler environment. + +This supports secure deployment patterns where environment-based secrets (e.g. via ``.env`` files, Docker, or Kubernetes secrets) are injected only into runtime components like workers — and not into components exposed to users, like the webserver. - Connections defined in environment variables will not show up in the Airflow UI or using ``airflow connections list``. +If you need connections to appear in the UI for visibility or editing, define them using the metadata database instead. Storing connections in a Secrets Backend diff --git a/airflow-core/docs/howto/docker-compose/index.rst b/airflow-core/docs/howto/docker-compose/index.rst index df46066a0bc0a..0d5e2a22bb62a 100644 --- a/airflow-core/docs/howto/docker-compose/index.rst +++ b/airflow-core/docs/howto/docker-compose/index.rst @@ -307,11 +307,13 @@ Examples of how you can extend the image with custom providers, python packages, apt packages and more can be found in :doc:`Building the image `. .. note:: - Creating custom images means that you need to maintain also a level of automation as you need to re-create the images - when either the packages you want to install or Airflow is upgraded. Please do not forget about keeping these scripts. - Also keep in mind, that in cases when you run pure Python tasks, you can use the - `Python Virtualenv functions <_howto/operator:PythonVirtualenvOperator>`_ which will - dynamically source and install python dependencies during runtime. With Airflow 2.8.0 Virtualenvs can also be cached. + Creating custom images means that you need to maintain also a level of + automation as you need to re-create the images when either the packages you + want to install or Airflow is upgraded. Please do not forget about keeping + these scripts. Also keep in mind, that in cases when you run pure Python + tasks, you can use :ref:`Python Virtualenv functions `, + which will dynamically source and install python dependencies during runtime. + With Airflow 2.8.0, virtualenvs can also be cached. Special case - adding dependencies via requirements.txt file ============================================================ diff --git a/airflow-core/docs/howto/dynamic-dag-generation.rst b/airflow-core/docs/howto/dynamic-dag-generation.rst index 814b620ea719b..734e89f5d805d 100644 --- a/airflow-core/docs/howto/dynamic-dag-generation.rst +++ b/airflow-core/docs/howto/dynamic-dag-generation.rst @@ -40,7 +40,8 @@ If you want to use variables to configure your code, you should always use `environment variables `_ in your top-level code rather than :doc:`Airflow Variables `. Using Airflow Variables in top-level code creates a connection to the metadata DB of Airflow to fetch the value, which can slow -down parsing and place extra load on the DB. See the `best practices on Airflow Variables `_ +down parsing and place extra load on the DB. See +:ref:`best practices on Airflow Variables ` to make the best use of Airflow Variables in your dags using Jinja templates. For example you could set ``DEPLOYMENT`` variable differently for your production and development diff --git a/airflow-core/docs/howto/run-behind-proxy.rst b/airflow-core/docs/howto/run-behind-proxy.rst index c9eb3295bd29c..483f3e796904c 100644 --- a/airflow-core/docs/howto/run-behind-proxy.rst +++ b/airflow-core/docs/howto/run-behind-proxy.rst @@ -63,6 +63,9 @@ To do so, you need to set the following setting in your ``airflow.cfg``:: `Uvicorn's docs `_. For the full options you can pass here. (Please note the ``--forwarded-allow-ips`` CLI option does not exist in Airflow.) +- Please make sure your proxy does not enforce http-only status on the Set-Cookie headers. + Airflow frontend needs to access the cookies through javascript, and a http-only flag would disturb this functionality. + .. spelling:: Uvicorn diff --git a/airflow-core/docs/howto/variable.rst b/airflow-core/docs/howto/variable.rst index 20c36597c94e6..a07a04a154571 100644 --- a/airflow-core/docs/howto/variable.rst +++ b/airflow-core/docs/howto/variable.rst @@ -61,10 +61,20 @@ You can use them in your dags as: Single underscores surround ``VAR``. This is in contrast with the way ``airflow.cfg`` parameters are stored, where double underscores surround the config section name. - Variables set using Environment Variables would not appear in the Airflow UI but you will - be able to use them in your DAG file. Variables set using Environment Variables will also + Variables set using Environment Variables will also take precedence over variables defined in the Airflow UI. +Visibility in UI and CLI +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Variables defined through environment variables are **not displayed** in the Airflow UI or listed using ``airflow variables list``. + +This is because these variables are **resolved dynamically at runtime**, typically on the **worker** process executing your task. They are not stored in the metadata database or loaded in the webserver or scheduler environment. + +This supports secure deployment patterns where environment-based secrets (e.g. via ``.env`` files, Docker, or Kubernetes secrets) are injected only into runtime components like workers — and not into components exposed to users, like the webserver. + +If you want variables to appear in the UI for visibility or editing, define them in the metadata database instead. + Securing Variables ------------------ diff --git a/airflow-core/docs/img/airflow_erd.sha256 b/airflow-core/docs/img/airflow_erd.sha256 index da3765115e31d..2d2c19551b3dd 100644 --- a/airflow-core/docs/img/airflow_erd.sha256 +++ b/airflow-core/docs/img/airflow_erd.sha256 @@ -1 +1 @@ -066cb891884eea1ee0496b5c507d4a52c20d0440387f9ec8bacb1d616a26e40e \ No newline at end of file +968bf5c974c4a9663b6be095837d255a2cc8e25ce80209904e672b36edd14148 \ No newline at end of file diff --git a/airflow-core/docs/img/airflow_erd.svg b/airflow-core/docs/img/airflow_erd.svg index 879c9f17f903e..8c66ac9da76dc 100644 --- a/airflow-core/docs/img/airflow_erd.svg +++ b/airflow-core/docs/img/airflow_erd.svg @@ -880,470 +880,470 @@ 0..N {0,1} - - -rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - - [JSON] - -rendered_fields - - [JSON] - NOT NULL - - - -task_instance--rendered_task_instance_fields - -0..N -1 - - - -task_instance--rendered_task_instance_fields - -0..N -1 - - - -task_instance--rendered_task_instance_fields - -0..N -1 - - - -task_instance--rendered_task_instance_fields - -0..N -1 - - + task_map - -task_map - -dag_id - - [VARCHAR(250)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -keys - - [JSONB] - -length - - [INTEGER] - NOT NULL + +task_map + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +keys + + [JSONB] + +length + + [INTEGER] + NOT NULL - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_instance--task_map - -0..N -1 + +0..N +1 - + task_reschedule - -task_reschedule - -id - - [INTEGER] - NOT NULL - -duration - - [INTEGER] - NOT NULL - -end_date - - [TIMESTAMP] - NOT NULL - -reschedule_date - - [TIMESTAMP] - NOT NULL - -start_date - - [TIMESTAMP] - NOT NULL - -ti_id - - [UUID] - NOT NULL + +task_reschedule + +id + + [INTEGER] + NOT NULL + +duration + + [INTEGER] + NOT NULL + +end_date + + [TIMESTAMP] + NOT NULL + +reschedule_date + + [TIMESTAMP] + NOT NULL + +start_date + + [TIMESTAMP] + NOT NULL + +ti_id + + [UUID] + NOT NULL - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + xcom - -xcom - -dag_run_id - - [INTEGER] - NOT NULL - -key - - [VARCHAR(512)] - NOT NULL - -map_index - - [INTEGER] - NOT NULL - -task_id - - [VARCHAR(250)] - NOT NULL - -dag_id - - [VARCHAR(250)] - NOT NULL - -run_id - - [VARCHAR(250)] - NOT NULL - -timestamp - - [TIMESTAMP] - NOT NULL - -value - - [JSONB] + +xcom + +dag_run_id + + [INTEGER] + NOT NULL + +key + + [VARCHAR(512)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +dag_id + + [VARCHAR(250)] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +timestamp + + [TIMESTAMP] + NOT NULL + +value + + [JSONB] - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - + task_instance_note - -task_instance_note - -ti_id - - [UUID] - NOT NULL - -content - - [VARCHAR(1000)] - -created_at - - [TIMESTAMP] - NOT NULL - -updated_at - - [TIMESTAMP] - NOT NULL - -user_id - - [VARCHAR(128)] + +task_instance_note + +ti_id + + [UUID] + NOT NULL + +content + + [VARCHAR(1000)] + +created_at + + [TIMESTAMP] + NOT NULL + +updated_at + + [TIMESTAMP] + NOT NULL + +user_id + + [VARCHAR(128)] - + task_instance--task_instance_note - -1 -1 + +1 +1 - + task_instance_history - -task_instance_history - -task_instance_id - - [UUID] - NOT NULL - -context_carrier - - [JSONB] - -custom_operator_name - - [VARCHAR(1000)] - -dag_id - - [VARCHAR(250)] - NOT NULL - -dag_version_id - - [UUID] - -duration - - [DOUBLE_PRECISION] - -end_date - - [TIMESTAMP] - -executor - - [VARCHAR(1000)] - -executor_config - - [BYTEA] - -external_executor_id - - [VARCHAR(250)] - -hostname - - [VARCHAR(1000)] - -map_index - - [INTEGER] - NOT NULL - -max_tries - - [INTEGER] - -next_kwargs - - [JSONB] - -next_method - - [VARCHAR(1000)] - -operator - - [VARCHAR(1000)] - -pid - - [INTEGER] - -pool - - [VARCHAR(256)] - NOT NULL - -pool_slots - - [INTEGER] - NOT NULL - -priority_weight - - [INTEGER] - -queue - - [VARCHAR(256)] - -queued_by_job_id - - [INTEGER] - -queued_dttm - - [TIMESTAMP] - -rendered_map_index - - [VARCHAR(250)] - -run_id - - [VARCHAR(250)] - NOT NULL - -scheduled_dttm - - [TIMESTAMP] - -span_status - - [VARCHAR(250)] - NOT NULL - -start_date - - [TIMESTAMP] - -state - - [VARCHAR(20)] - -task_display_name - - [VARCHAR(2000)] - -task_id - - [VARCHAR(250)] - NOT NULL - -trigger_id - - [INTEGER] - -trigger_timeout - - [TIMESTAMP] - -try_number - - [INTEGER] - NOT NULL - -unixname - - [VARCHAR(1000)] - -updated_at - - [TIMESTAMP] + +task_instance_history + +task_instance_id + + [UUID] + NOT NULL + +context_carrier + + [JSONB] + +custom_operator_name + + [VARCHAR(1000)] + +dag_id + + [VARCHAR(250)] + NOT NULL + +dag_version_id + + [UUID] + +duration + + [DOUBLE_PRECISION] + +end_date + + [TIMESTAMP] + +executor + + [VARCHAR(1000)] + +executor_config + + [BYTEA] + +external_executor_id + + [VARCHAR(250)] + +hostname + + [VARCHAR(1000)] + +map_index + + [INTEGER] + NOT NULL + +max_tries + + [INTEGER] + +next_kwargs + + [JSONB] + +next_method + + [VARCHAR(1000)] + +operator + + [VARCHAR(1000)] + +pid + + [INTEGER] + +pool + + [VARCHAR(256)] + NOT NULL + +pool_slots + + [INTEGER] + NOT NULL + +priority_weight + + [INTEGER] + +queue + + [VARCHAR(256)] + +queued_by_job_id + + [INTEGER] + +queued_dttm + + [TIMESTAMP] + +rendered_map_index + + [VARCHAR(250)] + +run_id + + [VARCHAR(250)] + NOT NULL + +scheduled_dttm + + [TIMESTAMP] + +span_status + + [VARCHAR(250)] + NOT NULL + +start_date + + [TIMESTAMP] + +state + + [VARCHAR(20)] + +task_display_name + + [VARCHAR(2000)] + +task_id + + [VARCHAR(250)] + NOT NULL + +trigger_id + + [INTEGER] + +trigger_timeout + + [TIMESTAMP] + +try_number + + [INTEGER] + NOT NULL + +unixname + + [VARCHAR(1000)] + +updated_at + + [TIMESTAMP] - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 - + task_instance--task_instance_history - -0..N -1 + +0..N +1 + + + +rendered_task_instance_fields + +rendered_task_instance_fields + +dag_id + + [VARCHAR(250)] + NOT NULL + +map_index + + [INTEGER] + NOT NULL + +run_id + + [VARCHAR(250)] + NOT NULL + +task_id + + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + + [JSON] + +rendered_fields + + [JSON] + NOT NULL + + + +task_instance--rendered_task_instance_fields + +0..N +1 + + + +task_instance--rendered_task_instance_fields + +0..N +1 + + + +task_instance--rendered_task_instance_fields + +0..N +1 + + + +task_instance--rendered_task_instance_fields + +0..N +1 diff --git a/airflow-core/docs/index.rst b/airflow-core/docs/index.rst index a7a21e671c981..1927774703faa 100644 --- a/airflow-core/docs/index.rst +++ b/airflow-core/docs/index.rst @@ -32,6 +32,15 @@ Airflow workflows are defined entirely in Python. This "workflows as code" appro - **Extensible**: The Airflow framework includes a wide range of built-in operators and can be extended to fit your needs. - **Flexible**: Airflow leverages the `Jinja `_ templating engine, allowing rich customizations. +.. _task-sdk-docs: + +Task SDK +======== + +For Airflow Task SDK, see the standalone reference & tutorial site: + + https://airflow.apache.org/docs/task-sdk/stable/ + Dags ----------------------------------------- diff --git a/airflow-core/docs/installation/supported-versions.rst b/airflow-core/docs/installation/supported-versions.rst index b7eb95de6b564..6169a495b1225 100644 --- a/airflow-core/docs/installation/supported-versions.rst +++ b/airflow-core/docs/installation/supported-versions.rst @@ -29,7 +29,7 @@ Apache Airflow® version life cycle: ========= ===================== ========= =============== ===================== ================ Version Current Patch/Minor State First Release Limited Maintenance EOL/Terminated ========= ===================== ========= =============== ===================== ================ -3 3.0.2 Supported Apr 22, 2025 TBD TBD +3 3.0.3 Supported Apr 22, 2025 TBD TBD 2 2.11.0 Supported Dec 17, 2020 Oct 22, 2025 Apr 22, 2026 1.10 1.10.15 EOL Aug 27, 2018 Dec 17, 2020 June 17, 2021 1.9 1.9.0 EOL Jan 03, 2018 Aug 27, 2018 Aug 27, 2018 diff --git a/airflow-core/docs/installation/upgrading_to_airflow3.rst b/airflow-core/docs/installation/upgrading_to_airflow3.rst index f192e22f313ad..00d4246d55abf 100644 --- a/airflow-core/docs/installation/upgrading_to_airflow3.rst +++ b/airflow-core/docs/installation/upgrading_to_airflow3.rst @@ -71,7 +71,7 @@ Some changes can be automatically fixed. To do so, run the following command: ruff check dag/ --select AIR301 --fix --preview -You can also configure these flags through configuration files. See `Configuring Ruff `_ for details. +You can also configure these flags through configuration files. See `Configuring Ruff `_ for details. Step 4: Install the Standard Providers -------------------------------------- diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index 2f35cf2deecf4..af3ec9a4a90b2 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``29ce7909c52b`` (head) | ``959e216a3abb`` | ``3.0.0`` | Change TI table to have unique UUID id/pk per attempt. | +| ``fe199e1abd77`` (head) | ``29ce7909c52b`` | ``3.0.3`` | Delete import errors. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``29ce7909c52b`` | ``959e216a3abb`` | ``3.0.0`` | Change TI table to have unique UUID id/pk per attempt. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``959e216a3abb`` | ``0e9519b56710`` | ``3.0.0`` | Rename ``is_active`` to ``is_stale`` column in ``dag`` | | | | | table. | diff --git a/airflow-core/docs/public-airflow-interface.rst b/airflow-core/docs/public-airflow-interface.rst index 4b87bd200b09b..aa5e3b5dc1bee 100644 --- a/airflow-core/docs/public-airflow-interface.rst +++ b/airflow-core/docs/public-airflow-interface.rst @@ -46,9 +46,9 @@ MAJOR version of Airflow. On the other hand, classes and methods starting with ` as protected Python methods) and ``__`` (also known as private Python methods) are not part of the Public Airflow Interface and might change at any time. -You can also use Airflow's Public Interface via the `Stable REST API `_ (based on the +You can also use Airflow's Public Interface via the :doc:`Stable REST API ` (based on the OpenAPI specification). For specific needs you can also use the -`Airflow Command Line Interface (CLI) `_ though its behaviour might change +:doc:`Airflow Command Line Interface (CLI) ` though its behaviour might change in details (such as output format and available flags) so if you want to rely on those in programmatic way, the Stable REST API is recommended. @@ -408,11 +408,12 @@ Everything not mentioned in this document should be considered as non-Public Int Sometimes in other applications those components could be relied on to keep backwards compatibility, but in Airflow they are not parts of the Public Interface and might change any time: -* `Database structure `_ is considered to be an internal implementation +* :doc:`Database structure ` is considered to be an internal implementation detail and you should not assume the structure is going to be maintained in a backwards-compatible way. -* `Web UI `_ is continuously evolving and there are no backwards compatibility guarantees on HTML elements. +* :doc:`Web UI ` is continuously evolving and there are no backwards + compatibility guarantees on HTML elements. * Python classes except those explicitly mentioned in this document, are considered an internal implementation detail and you should not assume they will be maintained diff --git a/airflow-core/docs/security/api.rst b/airflow-core/docs/security/api.rst index 115cfec443aef..3f86a75b863fb 100644 --- a/airflow-core/docs/security/api.rst +++ b/airflow-core/docs/security/api.rst @@ -93,3 +93,10 @@ Page size limit To protect against requests that may lead to application instability, the stable API has a limit of items in response. The default is 100 items, but you can change it using ``maximum_page_limit`` option in ``[api]`` section in the ``airflow.cfg`` file. + +Request Payload Considerations +------------------------------ + +When using REST APIs that accept data payloads (such as the Variables API), be mindful of the payload size. +Large payloads (out of ordinary size, like a million bytes) can impact the performance of the Airflow webserver. +It's recommended to implement appropriate size limits at the proxy layer for your deployment. diff --git a/airflow-core/docs/start.rst b/airflow-core/docs/start.rst index 8c3011398ea0c..f62669583e0ad 100644 --- a/airflow-core/docs/start.rst +++ b/airflow-core/docs/start.rst @@ -66,7 +66,7 @@ This quick start guide will help you bootstrap an Airflow standalone instance on :substitutions: - AIRFLOW_VERSION=3.0.2 + AIRFLOW_VERSION=3.0.3 # Extract the version of Python you have installed. If you're currently using a Python version that is not supported by Airflow, you may want to set this manually. # See above for supported versions. diff --git a/airflow-core/docs/troubleshooting.rst b/airflow-core/docs/troubleshooting.rst index f636b87a42c47..f354ea1a2ff6e 100644 --- a/airflow-core/docs/troubleshooting.rst +++ b/airflow-core/docs/troubleshooting.rst @@ -46,3 +46,13 @@ Here are some examples that could cause such an event: - A DAG run timeout, specified by ``dagrun_timeout`` in the DAG's definition. - An Airflow worker running out of memory - Usually, Airflow workers that run out of memory receive a SIGKILL, and the scheduler will fail the corresponding task instance for not having a heartbeat. However, in some scenarios, Airflow kills the task before that happens. + +Lingering task supervisor processes +----------------------------------- + +Under very high concurrency the socket handlers inside the task supervisor may +miss the final EOF events from the task process. When this occurs the supervisor +believes sockets are still open and will not exit. The +:ref:`workers.socket_cleanup_timeout ` option controls how long the supervisor +waits after the task finishes before force-closing any remaining sockets. If you +observe leftover ``supervisor`` processes, consider increasing this delay. diff --git a/airflow-core/pyproject.toml b/airflow-core/pyproject.toml index e29f1589cdefa..ab8aed87000fd 100644 --- a/airflow-core/pyproject.toml +++ b/airflow-core/pyproject.toml @@ -59,7 +59,7 @@ classifiers = [ ] # Version is defined in src/airflow/__init__.py and it is automatically synchronized by pre-commit -version = "3.0.2" +version = "3.0.3" dependencies = [ "a2wsgi>=1.10.8", @@ -95,7 +95,8 @@ dependencies = [ "jinja2>=3.1.5", "jsonschema>=4.19.1", "lazy-object-proxy>=1.2.0", - "libcst >=1.1.0", + 'libcst >=1.1.0,!=1.8.1;python_version<"3.10"', + 'libcst >=1.1.0;python_version>="3.10"', "linkify-it-py>=2.0.0", "lockfile>=0.12.2", "methodtools>=0.4.7", @@ -138,7 +139,7 @@ dependencies = [ # Does not work with it Tracked in https://github.com/fsspec/universal_pathlib/issues/276 "universal-pathlib>=0.2.2,!=0.2.4", "uuid6>=2024.7.10", - "apache-airflow-task-sdk<1.1.0,>=1.0.2", + "apache-airflow-task-sdk<1.1.0,>=1.0.3", # pre-installed providers "apache-airflow-providers-common-compat>=1.6.0", "apache-airflow-providers-common-io>=1.5.3", diff --git a/airflow-core/src/airflow/__init__.py b/airflow-core/src/airflow/__init__.py index c9c632791e080..cf804cb8ad5c2 100644 --- a/airflow-core/src/airflow/__init__.py +++ b/airflow-core/src/airflow/__init__.py @@ -25,7 +25,7 @@ # lib.) This is required by some IDEs to resolve the import paths. __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -__version__ = "3.0.2" +__version__ = "3.0.3" import os diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v1-simple-auth-manager-generated.yaml b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v2-simple-auth-manager-generated.yaml similarity index 100% rename from airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v1-simple-auth-manager-generated.yaml rename to airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v2-simple-auth-manager-generated.yaml diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/common.ts b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/common.ts index 497c0a3e40cf5..868eed70220fc 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/common.ts +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/common.ts @@ -1,37 +1,14 @@ -// generated with @7nohe/openapi-react-query-codegen@1.6.2 -import { UseQueryResult } from "@tanstack/react-query"; +// generated with @7nohe/openapi-react-query-codegen@1.6.2 +import { UseQueryResult } from "@tanstack/react-query"; import { SimpleAuthManagerLoginService } from "../requests/services.gen"; - -export type SimpleAuthManagerLoginServiceCreateTokenAllAdminsDefaultResponse = Awaited< - ReturnType ->; -export type SimpleAuthManagerLoginServiceCreateTokenAllAdminsQueryResult< - TData = SimpleAuthManagerLoginServiceCreateTokenAllAdminsDefaultResponse, - TError = unknown, -> = UseQueryResult; -export const useSimpleAuthManagerLoginServiceCreateTokenAllAdminsKey = - "SimpleAuthManagerLoginServiceCreateTokenAllAdmins"; -export const UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn = (queryKey?: Array) => [ - useSimpleAuthManagerLoginServiceCreateTokenAllAdminsKey, - ...(queryKey ?? []), -]; -export type SimpleAuthManagerLoginServiceLoginAllAdminsDefaultResponse = Awaited< - ReturnType ->; -export type SimpleAuthManagerLoginServiceLoginAllAdminsQueryResult< - TData = SimpleAuthManagerLoginServiceLoginAllAdminsDefaultResponse, - TError = unknown, -> = UseQueryResult; -export const useSimpleAuthManagerLoginServiceLoginAllAdminsKey = - "SimpleAuthManagerLoginServiceLoginAllAdmins"; -export const UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn = (queryKey?: Array) => [ - useSimpleAuthManagerLoginServiceLoginAllAdminsKey, - ...(queryKey ?? []), -]; -export type SimpleAuthManagerLoginServiceCreateTokenMutationResult = Awaited< - ReturnType ->; -export type SimpleAuthManagerLoginServiceCreateTokenCliMutationResult = Awaited< - ReturnType ->; +export type SimpleAuthManagerLoginServiceCreateTokenAllAdminsDefaultResponse = Awaited>; +export type SimpleAuthManagerLoginServiceCreateTokenAllAdminsQueryResult = UseQueryResult; +export const useSimpleAuthManagerLoginServiceCreateTokenAllAdminsKey = "SimpleAuthManagerLoginServiceCreateTokenAllAdmins"; +export const UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn = (queryKey?: Array) => [useSimpleAuthManagerLoginServiceCreateTokenAllAdminsKey, ...(queryKey ?? [])]; +export type SimpleAuthManagerLoginServiceLoginAllAdminsDefaultResponse = Awaited>; +export type SimpleAuthManagerLoginServiceLoginAllAdminsQueryResult = UseQueryResult; +export const useSimpleAuthManagerLoginServiceLoginAllAdminsKey = "SimpleAuthManagerLoginServiceLoginAllAdmins"; +export const UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn = (queryKey?: Array) => [useSimpleAuthManagerLoginServiceLoginAllAdminsKey, ...(queryKey ?? [])]; +export type SimpleAuthManagerLoginServiceCreateTokenMutationResult = Awaited>; +export type SimpleAuthManagerLoginServiceCreateTokenCliMutationResult = Awaited>; diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/ensureQueryData.ts b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/ensureQueryData.ts index 2d73f46985c5b..c1213d8527779 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/ensureQueryData.ts +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/ensureQueryData.ts @@ -1,16 +1,7 @@ -// generated with @7nohe/openapi-react-query-codegen@1.6.2 -import { type QueryClient } from "@tanstack/react-query"; +// generated with @7nohe/openapi-react-query-codegen@1.6.2 +import { type QueryClient } from "@tanstack/react-query"; import { SimpleAuthManagerLoginService } from "../requests/services.gen"; import * as Common from "./common"; - -export const ensureUseSimpleAuthManagerLoginServiceCreateTokenAllAdminsData = (queryClient: QueryClient) => - queryClient.ensureQueryData({ - queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(), - queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins(), - }); -export const ensureUseSimpleAuthManagerLoginServiceLoginAllAdminsData = (queryClient: QueryClient) => - queryClient.ensureQueryData({ - queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(), - queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins(), - }); +export const ensureUseSimpleAuthManagerLoginServiceCreateTokenAllAdminsData = (queryClient: QueryClient) => queryClient.ensureQueryData({ queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(), queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins() }); +export const ensureUseSimpleAuthManagerLoginServiceLoginAllAdminsData = (queryClient: QueryClient) => queryClient.ensureQueryData({ queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(), queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins() }); diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/index.ts b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/index.ts index 987c8a4ea6dde..8e9b6922f00c8 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/index.ts +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/index.ts @@ -1,4 +1,4 @@ -// generated with @7nohe/openapi-react-query-codegen@1.6.2 +// generated with @7nohe/openapi-react-query-codegen@1.6.2 export * from "./common"; export * from "./queries"; diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/infiniteQueries.ts b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/infiniteQueries.ts index 0baac0445f402..37298729b1133 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/infiniteQueries.ts +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/infiniteQueries.ts @@ -1 +1,2 @@ -// generated with @7nohe/openapi-react-query-codegen@1.6.2 +// generated with @7nohe/openapi-react-query-codegen@1.6.2 + diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/prefetch.ts b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/prefetch.ts index a9e6112475c95..6801202bf314f 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/prefetch.ts +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/prefetch.ts @@ -1,16 +1,7 @@ -// generated with @7nohe/openapi-react-query-codegen@1.6.2 -import { type QueryClient } from "@tanstack/react-query"; +// generated with @7nohe/openapi-react-query-codegen@1.6.2 +import { type QueryClient } from "@tanstack/react-query"; import { SimpleAuthManagerLoginService } from "../requests/services.gen"; import * as Common from "./common"; - -export const prefetchUseSimpleAuthManagerLoginServiceCreateTokenAllAdmins = (queryClient: QueryClient) => - queryClient.prefetchQuery({ - queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(), - queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins(), - }); -export const prefetchUseSimpleAuthManagerLoginServiceLoginAllAdmins = (queryClient: QueryClient) => - queryClient.prefetchQuery({ - queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(), - queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins(), - }); +export const prefetchUseSimpleAuthManagerLoginServiceCreateTokenAllAdmins = (queryClient: QueryClient) => queryClient.prefetchQuery({ queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(), queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins() }); +export const prefetchUseSimpleAuthManagerLoginServiceLoginAllAdmins = (queryClient: QueryClient) => queryClient.prefetchQuery({ queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(), queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins() }); diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/queries.ts b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/queries.ts index 316aa8d02ce0e..09d1142c9a1f7 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/queries.ts +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/queries.ts @@ -1,91 +1,18 @@ -// generated with @7nohe/openapi-react-query-codegen@1.6.2 -import { UseMutationOptions, UseQueryOptions, useMutation, useQuery } from "@tanstack/react-query"; +// generated with @7nohe/openapi-react-query-codegen@1.6.2 +import { UseMutationOptions, UseQueryOptions, useMutation, useQuery } from "@tanstack/react-query"; import { SimpleAuthManagerLoginService } from "../requests/services.gen"; import { LoginBody } from "../requests/types.gen"; import * as Common from "./common"; - -export const useSimpleAuthManagerLoginServiceCreateTokenAllAdmins = < - TData = Common.SimpleAuthManagerLoginServiceCreateTokenAllAdminsDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useQuery({ - queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(queryKey), - queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins() as TData, - ...options, - }); -export const useSimpleAuthManagerLoginServiceLoginAllAdmins = < - TData = Common.SimpleAuthManagerLoginServiceLoginAllAdminsDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useQuery({ - queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(queryKey), - queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins() as TData, - ...options, - }); -export const useSimpleAuthManagerLoginServiceCreateToken = < - TData = Common.SimpleAuthManagerLoginServiceCreateTokenMutationResult, - TError = unknown, - TContext = unknown, ->( - options?: Omit< - UseMutationOptions< - TData, - TError, - { - requestBody: LoginBody; - }, - TContext - >, - "mutationFn" - >, -) => - useMutation< - TData, - TError, - { - requestBody: LoginBody; - }, - TContext - >({ - mutationFn: ({ requestBody }) => - SimpleAuthManagerLoginService.createToken({ requestBody }) as unknown as Promise, - ...options, - }); -export const useSimpleAuthManagerLoginServiceCreateTokenCli = < - TData = Common.SimpleAuthManagerLoginServiceCreateTokenCliMutationResult, - TError = unknown, - TContext = unknown, ->( - options?: Omit< - UseMutationOptions< - TData, - TError, - { - requestBody: LoginBody; - }, - TContext - >, - "mutationFn" - >, -) => - useMutation< - TData, - TError, - { - requestBody: LoginBody; - }, - TContext - >({ - mutationFn: ({ requestBody }) => - SimpleAuthManagerLoginService.createTokenCli({ requestBody }) as unknown as Promise, - ...options, - }); +export const useSimpleAuthManagerLoginServiceCreateTokenAllAdmins = = unknown[]>(queryKey?: TQueryKey, options?: Omit, "queryKey" | "queryFn">) => useQuery({ queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(queryKey), queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins() as TData, ...options }); +export const useSimpleAuthManagerLoginServiceLoginAllAdmins = = unknown[]>(queryKey?: TQueryKey, options?: Omit, "queryKey" | "queryFn">) => useQuery({ queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(queryKey), queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins() as TData, ...options }); +export const useSimpleAuthManagerLoginServiceCreateToken = (options?: Omit, "mutationFn">) => useMutation({ mutationFn: ({ requestBody }) => SimpleAuthManagerLoginService.createToken({ requestBody }) as unknown as Promise, ...options }); +export const useSimpleAuthManagerLoginServiceCreateTokenCli = (options?: Omit, "mutationFn">) => useMutation({ mutationFn: ({ requestBody }) => SimpleAuthManagerLoginService.createTokenCli({ requestBody }) as unknown as Promise, ...options }); diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/suspense.ts b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/suspense.ts index de11487464fe4..79b01bc72cb32 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/suspense.ts +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/queries/suspense.ts @@ -1,32 +1,7 @@ -// generated with @7nohe/openapi-react-query-codegen@1.6.2 -import { UseQueryOptions, useSuspenseQuery } from "@tanstack/react-query"; +// generated with @7nohe/openapi-react-query-codegen@1.6.2 +import { UseQueryOptions, useSuspenseQuery } from "@tanstack/react-query"; import { SimpleAuthManagerLoginService } from "../requests/services.gen"; import * as Common from "./common"; - -export const useSimpleAuthManagerLoginServiceCreateTokenAllAdminsSuspense = < - TData = Common.SimpleAuthManagerLoginServiceCreateTokenAllAdminsDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useSuspenseQuery({ - queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(queryKey), - queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins() as TData, - ...options, - }); -export const useSimpleAuthManagerLoginServiceLoginAllAdminsSuspense = < - TData = Common.SimpleAuthManagerLoginServiceLoginAllAdminsDefaultResponse, - TError = unknown, - TQueryKey extends Array = unknown[], ->( - queryKey?: TQueryKey, - options?: Omit, "queryKey" | "queryFn">, -) => - useSuspenseQuery({ - queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(queryKey), - queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins() as TData, - ...options, - }); +export const useSimpleAuthManagerLoginServiceCreateTokenAllAdminsSuspense = = unknown[]>(queryKey?: TQueryKey, options?: Omit, "queryKey" | "queryFn">) => useSuspenseQuery({ queryKey: Common.UseSimpleAuthManagerLoginServiceCreateTokenAllAdminsKeyFn(queryKey), queryFn: () => SimpleAuthManagerLoginService.createTokenAllAdmins() as TData, ...options }); +export const useSimpleAuthManagerLoginServiceLoginAllAdminsSuspense = = unknown[]>(queryKey?: TQueryKey, options?: Omit, "queryKey" | "queryFn">) => useSuspenseQuery({ queryKey: Common.UseSimpleAuthManagerLoginServiceLoginAllAdminsKeyFn(queryKey), queryFn: () => SimpleAuthManagerLoginService.loginAllAdmins() as TData, ...options }); diff --git a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/package.json b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/package.json index 61e99e7773143..451b1939ffcdf 100644 --- a/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/package.json +++ b/airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/package.json @@ -10,7 +10,7 @@ "lint:fix": "eslint --fix && tsc --p tsconfig.app.json", "format": "pnpm prettier --write .", "preview": "vite preview", - "codegen": "openapi-rq -i \"../openapi/v1-simple-auth-manager-generated.yaml\" -c axios --format prettier -o openapi-gen --operationId", + "codegen": "openapi-rq -i \"../openapi/v2-simple-auth-manager-generated.yaml\" -c axios --format prettier -o openapi-gen --operationId", "test": "vitest run", "coverage": "vitest run --coverage" }, diff --git a/airflow-core/src/airflow/api_fastapi/common/exceptions.py b/airflow-core/src/airflow/api_fastapi/common/exceptions.py index 061eec55d3d84..39909b7a46395 100644 --- a/airflow-core/src/airflow/api_fastapi/common/exceptions.py +++ b/airflow-core/src/airflow/api_fastapi/common/exceptions.py @@ -17,6 +17,8 @@ from __future__ import annotations +import logging +import traceback from abc import ABC, abstractmethod from enum import Enum from typing import Generic, TypeVar @@ -24,8 +26,13 @@ from fastapi import HTTPException, Request, status from sqlalchemy.exc import IntegrityError +from airflow.configuration import conf +from airflow.utils.strings import get_random_string + T = TypeVar("T", bound=Exception) +log = logging.getLogger(__name__) + class BaseErrorHandler(Generic[T], ABC): """Base class for error handlers.""" @@ -61,12 +68,28 @@ def __init__(self): def exception_handler(self, request: Request, exc: IntegrityError): """Handle IntegrityError exception.""" if self._is_dialect_matched(exc): + exception_id = get_random_string() + stacktrace = "" + for tb in traceback.format_tb(exc.__traceback__): + stacktrace += tb + + log_message = f"Error with id {exception_id}\n{stacktrace}" + log.error(log_message) + if conf.get("api", "expose_stacktrace") == "True": + message = log_message + else: + message = ( + "Serious error when handling your request. Check logs for more details - " + f"you will find it in api server when you look for ID {exception_id}" + ) + raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail={ "reason": "Unique constraint violation", "statement": str(exc.statement), "orig_error": str(exc.orig), + "message": message, }, ) diff --git a/airflow-core/src/airflow/api_fastapi/common/parameters.py b/airflow-core/src/airflow/api_fastapi/common/parameters.py index 141447488c26d..4219bb66284af 100644 --- a/airflow-core/src/airflow/api_fastapi/common/parameters.py +++ b/airflow-core/src/airflow/api_fastapi/common/parameters.py @@ -37,7 +37,7 @@ from fastapi import Depends, HTTPException, Query, status from pendulum.parsing.exceptions import ParserError from pydantic import AfterValidator, BaseModel, NonNegativeInt -from sqlalchemy import Column, and_, case, or_ +from sqlalchemy import Column, and_, case, func, or_ from sqlalchemy.inspection import inspect from airflow.api_fastapi.core_api.base import OrmClause @@ -484,9 +484,12 @@ def depends_datetime( lower_bound: datetime | None = Query(alias=f"{filter_name}_gte", default=None), upper_bound: datetime | None = Query(alias=f"{filter_name}_lte", default=None), ) -> RangeFilter: + attr = getattr(model, attribute_name or filter_name) + if filter_name in ("start_date", "end_date"): + attr = func.coalesce(attr, func.now()) return RangeFilter( Range(lower_bound=lower_bound, upper_bound=upper_bound), - getattr(model, attribute_name or filter_name), + attr, ) return depends_datetime @@ -601,7 +604,7 @@ def _transform_ti_states(states: list[str] | None) -> list[TaskInstanceState | N return None try: - return [None if s in ("none", None) else TaskInstanceState(s) for s in states] + return [None if s in ("no_status", "none", None) else TaskInstanceState(s) for s in states] except ValueError: raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/connections.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/connections.py index fee330e1fd1d6..f2ac1f7a9403a 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/connections.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/connections.py @@ -17,15 +17,12 @@ from __future__ import annotations -import json from collections import abc from typing import Annotated -from pydantic import Field, field_validator -from pydantic_core.core_schema import ValidationInfo +from pydantic import Field from airflow.api_fastapi.core_api.base import BaseModel, StrictBaseModel -from airflow.sdk.execution_time.secrets_masker import redact # Response Models @@ -42,26 +39,6 @@ class ConnectionResponse(BaseModel): password: str | None extra: str | None - @field_validator("password", mode="after") - @classmethod - def redact_password(cls, v: str | None, field_info: ValidationInfo) -> str | None: - if v is None: - return None - return redact(v, field_info.field_name) - - @field_validator("extra", mode="before") - @classmethod - def redact_extra(cls, v: str | None) -> str | None: - if v is None: - return None - try: - extra_dict = json.loads(v) - redacted_dict = redact(extra_dict) - return json.dumps(redacted_dict) - except json.JSONDecodeError: - # we can't redact fields in an unstructured `extra` - return v - class ConnectionCollectionResponse(BaseModel): """Connection Collection serializer for responses.""" diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py index c8db6825a57c4..e461f9c62a638 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/dags.py @@ -91,7 +91,7 @@ def get_owners(cls, v: Any) -> list[str] | None: if v is None: return [] if isinstance(v, str): - return v.split(",") + return [x.strip() for x in v.split(",")] return v @field_validator("timetable_summary", mode="before") diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/common.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/common.py index cc4d7913b2244..0f315326194e5 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/common.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/common.py @@ -17,9 +17,14 @@ from __future__ import annotations +from datetime import datetime from typing import Generic, Literal, TypeVar +from pydantic import computed_field + from airflow.api_fastapi.core_api.base import BaseModel +from airflow.utils.state import TaskInstanceState +from airflow.utils.types import DagRunType class BaseEdgeResponse(BaseModel): @@ -52,8 +57,46 @@ class BaseNodeResponse(BaseModel): N = TypeVar("N", bound=BaseNodeResponse) +class GridNodeResponse(BaseModel): + """Base Node serializer for responses.""" + + id: str + label: str + children: list[GridNodeResponse] | None = None + is_mapped: bool | None + setup_teardown_type: Literal["setup", "teardown"] | None = None + + +class GridRunsResponse(BaseModel): + """Base Node serializer for responses.""" + + dag_id: str + run_id: str + queued_at: datetime | None + start_date: datetime | None + end_date: datetime | None + run_after: datetime + state: TaskInstanceState | None + run_type: DagRunType + + @computed_field + def duration(self) -> int | None: + if self.start_date and self.end_date: + return (self.end_date - self.start_date).seconds + return None + + class BaseGraphResponse(BaseModel, Generic[E, N]): """Base Graph serializer for responses.""" edges: list[E] nodes: list[N] + + +class LatestRunResponse(BaseModel): + """Base Node serializer for responses.""" + + id: int + dag_id: str + run_id: str + run_after: datetime diff --git a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/grid.py b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/grid.py index 822eb6f3e1a89..48ea0ece79495 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/grid.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/datamodels/ui/grid.py @@ -21,7 +21,6 @@ from pydantic import BaseModel, Field -from airflow.api_fastapi.core_api.datamodels.ui.structure import StructureDataResponse from airflow.utils.state import DagRunState, TaskInstanceState from airflow.utils.types import DagRunType @@ -40,6 +39,13 @@ class GridTaskInstanceSummary(BaseModel): note: str | None +class LightGridTaskInstanceSummary(BaseModel): + """Task Instance Summary model for the Grid UI.""" + + task_id: str + state: TaskInstanceState | None + + class GridDAGRunwithTIs(BaseModel): """DAG Run model for the Grid UI.""" @@ -57,8 +63,15 @@ class GridDAGRunwithTIs(BaseModel): task_instances: list[GridTaskInstanceSummary] +class GridTISummaries(BaseModel): + """DAG Run model for the Grid UI.""" + + run_id: str + dag_id: str + task_instances: list[LightGridTaskInstanceSummary] + + class GridResponse(BaseModel): """Response model for the Grid UI.""" dag_runs: list[GridDAGRunwithTIs] - structure: StructureDataResponse diff --git a/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml b/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml index e18ef75ed02db..5814f7e4de894 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml +++ b/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml @@ -589,6 +589,293 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /ui/grid/structure/{dag_id}: + get: + tags: + - Grid + summary: Get Dag Structure + description: Return dag structure for grid view. + operationId: get_dag_structure + security: + - OAuth2PasswordBearer: [] + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: offset + in: query + required: false + schema: + type: integer + minimum: 0 + default: 0 + title: Offset + - name: limit + in: query + required: false + schema: + type: integer + minimum: 0 + default: 50 + title: Limit + - name: order_by + in: query + required: false + schema: + type: string + default: id + title: Order By + - name: run_after_gte + in: query + required: false + schema: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Run After Gte + - name: run_after_lte + in: query + required: false + schema: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Run After Lte + responses: + '200': + description: Successful Response + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/GridNodeResponse' + title: Response Get Dag Structure + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /ui/grid/runs/{dag_id}: + get: + tags: + - Grid + summary: Get Grid Runs + description: Get info about a run for the grid. + operationId: get_grid_runs + security: + - OAuth2PasswordBearer: [] + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: offset + in: query + required: false + schema: + type: integer + minimum: 0 + default: 0 + title: Offset + - name: limit + in: query + required: false + schema: + type: integer + minimum: 0 + default: 50 + title: Limit + - name: order_by + in: query + required: false + schema: + type: string + default: id + title: Order By + - name: run_after_gte + in: query + required: false + schema: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Run After Gte + - name: run_after_lte + in: query + required: false + schema: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Run After Lte + responses: + '200': + description: Successful Response + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/GridRunsResponse' + title: Response Get Grid Runs + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /ui/grid/ti_summaries/{dag_id}/{run_id}: + get: + tags: + - Grid + summary: Get Grid Ti Summaries + description: 'Get states for TIs / "groups" of TIs. + + + Essentially this is to know what color to put in the squares in the grid. + + + The tricky part here is that we aggregate the state for groups and mapped + tasks. + + + We don''t add all the TIs for mapped TIs -- we only add one entry for the + mapped task and + + its state is an aggregate of its TI states. + + + And for task groups, we add a "task" for that which is not really a task but + is just + + an entry that represents the group (so that we can show a filled in box when + the group + + is not expanded) and its state is an agg of those within it.' + operationId: get_grid_ti_summaries + security: + - OAuth2PasswordBearer: [] + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: run_id + in: path + required: true + schema: + type: string + title: Run Id + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/GridTISummaries' + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /ui/grid/latest_run/{dag_id}: + get: + tags: + - Grid + summary: Get Latest Run + description: 'Get information about the latest dag run by run_after. + + + This is used by the UI to figure out if it needs to rerun queries and resume + auto refresh.' + operationId: get_latest_run + security: + - OAuth2PasswordBearer: [] + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + responses: + '200': + description: Successful Response + content: + application/json: + schema: + anyOf: + - $ref: '#/components/schemas/LatestRunResponse' + - type: 'null' + title: Response Get Latest Run + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' components: schemas: BackfillCollectionResponse: @@ -1410,6 +1697,41 @@ components: - task_instances title: GridDAGRunwithTIs description: DAG Run model for the Grid UI. + GridNodeResponse: + properties: + id: + type: string + title: Id + label: + type: string + title: Label + children: + anyOf: + - items: + $ref: '#/components/schemas/GridNodeResponse' + type: array + - type: 'null' + title: Children + is_mapped: + anyOf: + - type: boolean + - type: 'null' + title: Is Mapped + setup_teardown_type: + anyOf: + - type: string + enum: + - setup + - teardown + - type: 'null' + title: Setup Teardown Type + type: object + required: + - id + - label + - is_mapped + title: GridNodeResponse + description: Base Node serializer for responses. GridResponse: properties: dag_runs: @@ -1417,14 +1739,86 @@ components: $ref: '#/components/schemas/GridDAGRunwithTIs' type: array title: Dag Runs - structure: - $ref: '#/components/schemas/StructureDataResponse' type: object required: - dag_runs - - structure title: GridResponse description: Response model for the Grid UI. + GridRunsResponse: + properties: + dag_id: + type: string + title: Dag Id + run_id: + type: string + title: Run Id + queued_at: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Queued At + start_date: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start Date + end_date: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End Date + run_after: + type: string + format: date-time + title: Run After + state: + anyOf: + - $ref: '#/components/schemas/TaskInstanceState' + - type: 'null' + run_type: + $ref: '#/components/schemas/DagRunType' + duration: + anyOf: + - type: integer + - type: 'null' + title: Duration + readOnly: true + type: object + required: + - dag_id + - run_id + - queued_at + - start_date + - end_date + - run_after + - state + - run_type + - duration + title: GridRunsResponse + description: Base Node serializer for responses. + GridTISummaries: + properties: + run_id: + type: string + title: Run Id + dag_id: + type: string + title: Dag Id + task_instances: + items: + $ref: '#/components/schemas/LightGridTaskInstanceSummary' + type: array + title: Task Instances + type: object + required: + - run_id + - dag_id + - task_instances + title: GridTISummaries + description: DAG Run model for the Grid UI. GridTaskInstanceSummary: properties: task_id: @@ -1520,6 +1914,44 @@ components: - task_instance_states title: HistoricalMetricDataResponse description: Historical Metric Data serializer for responses. + LatestRunResponse: + properties: + id: + type: integer + title: Id + dag_id: + type: string + title: Dag Id + run_id: + type: string + title: Run Id + run_after: + type: string + format: date-time + title: Run After + type: object + required: + - id + - dag_id + - run_id + - run_after + title: LatestRunResponse + description: Base Node serializer for responses. + LightGridTaskInstanceSummary: + properties: + task_id: + type: string + title: Task Id + state: + anyOf: + - $ref: '#/components/schemas/TaskInstanceState' + - type: 'null' + type: object + required: + - task_id + - state + title: LightGridTaskInstanceSummary + description: Task Instance Summary model for the Grid UI. MenuItem: type: string enum: diff --git a/airflow-core/src/airflow/api_fastapi/core_api/openapi/v1-rest-api-generated.yaml b/airflow-core/src/airflow/api_fastapi/core_api/openapi/v2-rest-api-generated.yaml similarity index 100% rename from airflow-core/src/airflow/api_fastapi/core_api/openapi/v1-rest-api-generated.yaml rename to airflow-core/src/airflow/api_fastapi/core_api/openapi/v2-rest-api-generated.yaml diff --git a/airflow-core/src/airflow/api_fastapi/core_api/routes/public/extra_links.py b/airflow-core/src/airflow/api_fastapi/core_api/routes/public/extra_links.py index d6d9b04b9ef75..f21be70388039 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/routes/public/extra_links.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/routes/public/extra_links.py @@ -77,7 +77,7 @@ def get_extra_links( if not ti: raise HTTPException( status.HTTP_404_NOT_FOUND, - f"DAG Run with ID = {dag_run_id} not found", + "TaskInstance not found", ) all_extra_link_pairs = ( diff --git a/airflow-core/src/airflow/api_fastapi/core_api/routes/public/log.py b/airflow-core/src/airflow/api_fastapi/core_api/routes/public/log.py index 05313e2b69b5a..de7b327b6a4f1 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/routes/public/log.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/routes/public/log.py @@ -112,6 +112,7 @@ def get_log( TaskInstance.dag_id == dag_id, TaskInstance.run_id == dag_run_id, TaskInstance.map_index == map_index, + TaskInstance.try_number == try_number, ) .join(TaskInstance.dag_run) .options(joinedload(TaskInstance.trigger).joinedload(Trigger.triggerer_job)) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/grid.py b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/grid.py index 09409335c2c09..ce1a582a511f7 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/grid.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/grid.py @@ -42,23 +42,35 @@ datetime_range_filter_factory, ) from airflow.api_fastapi.common.router import AirflowRouter +from airflow.api_fastapi.core_api.datamodels.ui.common import ( + GridNodeResponse, + GridRunsResponse, + LatestRunResponse, +) from airflow.api_fastapi.core_api.datamodels.ui.grid import ( GridDAGRunwithTIs, GridResponse, + GridTISummaries, ) from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc from airflow.api_fastapi.core_api.security import requires_access_dag from airflow.api_fastapi.core_api.services.ui.grid import ( + _find_aggregates, + _merge_node_dicts, fill_task_instance_summaries, get_child_task_map, - get_combined_structure, - get_structure_from_dag, get_task_group_map, ) -from airflow.models import DagRun, TaskInstance from airflow.models.dag_version import DagVersion +from airflow.models.dagrun import DagRun +from airflow.models.serialized_dag import SerializedDagModel +from airflow.models.taskinstance import TaskInstance from airflow.models.taskinstancehistory import TaskInstanceHistory from airflow.utils.state import TaskInstanceState +from airflow.utils.task_group import ( + get_task_group_children_getter, + task_group_to_dict_grid, +) log = structlog.get_logger(logger_name=__name__) grid_router = AirflowRouter(prefix="/grid", tags=["Grid"]) @@ -71,6 +83,7 @@ Depends(requires_access_dag(method="GET", access_entity=DagAccessEntity.TASK_INSTANCE)), Depends(requires_access_dag(method="GET", access_entity=DagAccessEntity.RUN)), ], + response_model_exclude_none=True, ) def grid_data( dag_id: str, @@ -124,11 +137,9 @@ def grid_data( ) dag_runs = list(session.scalars(dag_runs_select_filter).unique()) - # Check if there are any DAG Runs with given criteria to eliminate unnecessary queries/errors if not dag_runs: - structure = get_structure_from_dag(dag=dag) - return GridResponse(dag_runs=[], structure=structure) + return GridResponse(dag_runs=[]) # Retrieve, sort and encode the Task Instances tis_of_dag_runs, _ = paginated_select( @@ -257,8 +268,324 @@ def grid_data( ) for dag_run in dag_runs ] + return GridResponse(dag_runs=grid_dag_runs) + + +def _get_latest_serdag(dag_id, session): + serdag = session.scalar( + select(SerializedDagModel) + .where( + SerializedDagModel.dag_id == dag_id, + ) + .order_by(SerializedDagModel.id.desc()) + .limit(1) + ) + if not serdag: + raise HTTPException( + status.HTTP_404_NOT_FOUND, + f"Dag with id {dag_id} was not found", + ) + return serdag + + +def _get_serdag(dag_id, dag_version_id, session) -> SerializedDagModel | None: + # this is a simplification - we account for structure based on the first task + version = session.scalar(select(DagVersion).where(DagVersion.id == dag_version_id)) + if not version: + version = session.scalar( + select(DagVersion) + .where( + DagVersion.dag_id == dag_id, + ) + .order_by(DagVersion.id) # ascending cus this is mostly for pre-3.0 upgrade + .limit(1) + ) + if not (serdag := version.serialized_dag): + log.error( + "No serialized dag found", + dag_id=dag_id, + version_id=version.id, + version_number=version.version_number, + ) + return serdag + + +@grid_router.get( + "/structure/{dag_id}", + responses=create_openapi_http_exception_doc([status.HTTP_400_BAD_REQUEST, status.HTTP_404_NOT_FOUND]), + dependencies=[ + Depends(requires_access_dag(method="GET", access_entity=DagAccessEntity.TASK_INSTANCE)), + Depends(requires_access_dag(method="GET", access_entity=DagAccessEntity.RUN)), + ], + response_model_exclude_none=True, +) +def get_dag_structure( + dag_id: str, + session: SessionDep, + offset: QueryOffset, + limit: QueryLimit, + order_by: Annotated[ + SortParam, + Depends(SortParam(["run_after", "logical_date", "start_date", "end_date"], DagRun).dynamic_depends()), + ], + run_after: Annotated[RangeFilter, Depends(datetime_range_filter_factory("run_after", DagRun))], +) -> list[GridNodeResponse]: + """Return dag structure for grid view.""" + latest_serdag = _get_latest_serdag(dag_id, session) + latest_dag = latest_serdag.dag - flat_tis = itertools.chain.from_iterable(tis_by_run_id.values()) - structure = get_combined_structure(task_instances=flat_tis, session=session) + # Retrieve, sort the previous DAG Runs + base_query = select(DagRun.id).where(DagRun.dag_id == dag_id) + # This comparison is to fall back to DAG timetable when no order_by is provided + if order_by.value == order_by.get_primary_key_string(): + ordering = list(latest_dag.timetable.run_ordering) + order_by = SortParam( + allowed_attrs=ordering, + model=DagRun, + ).set_value(ordering[0]) + dag_runs_select_filter, _ = paginated_select( + statement=base_query, + order_by=order_by, + offset=offset, + filters=[run_after], + limit=limit, + ) + run_ids = list(session.scalars(dag_runs_select_filter)) + + task_group_sort = get_task_group_children_getter() + if not run_ids: + nodes = [task_group_to_dict_grid(x) for x in task_group_sort(latest_dag.task_group)] + return nodes - return GridResponse(dag_runs=grid_dag_runs, structure=structure) + serdags = session.scalars( + select(SerializedDagModel).where( + SerializedDagModel.dag_version_id.in_( + select(TaskInstance.dag_version_id) + .join(TaskInstance.dag_run) + .where( + DagRun.id.in_(run_ids), + SerializedDagModel.id != latest_serdag.id, + ) + ) + ) + ) + merged_nodes: list[GridNodeResponse] = [] + dags = [latest_dag] + for serdag in serdags: + if serdag: + dags.append(serdag.dag) + for dag in dags: + nodes = [task_group_to_dict_grid(x) for x in task_group_sort(dag.task_group)] + _merge_node_dicts(merged_nodes, nodes) + + return merged_nodes + + +@grid_router.get( + "/runs/{dag_id}", + responses=create_openapi_http_exception_doc( + [ + status.HTTP_400_BAD_REQUEST, + status.HTTP_404_NOT_FOUND, + ] + ), + dependencies=[ + Depends( + requires_access_dag( + method="GET", + access_entity=DagAccessEntity.TASK_INSTANCE, + ) + ), + Depends( + requires_access_dag( + method="GET", + access_entity=DagAccessEntity.RUN, + ) + ), + ], + response_model_exclude_none=True, +) +def get_grid_runs( + dag_id: str, + session: SessionDep, + offset: QueryOffset, + limit: QueryLimit, + order_by: Annotated[ + SortParam, + Depends( + SortParam( + [ + "run_after", + "logical_date", + "start_date", + "end_date", + ], + DagRun, + ).dynamic_depends() + ), + ], + run_after: Annotated[RangeFilter, Depends(datetime_range_filter_factory("run_after", DagRun))], +) -> list[GridRunsResponse]: + """Get info about a run for the grid.""" + # Retrieve, sort the previous DAG Runs + base_query = select( + DagRun.dag_id, + DagRun.run_id, + DagRun.queued_at, + DagRun.start_date, + DagRun.end_date, + DagRun.run_after, + DagRun.state, + DagRun.run_type, + ).where(DagRun.dag_id == dag_id) + + # This comparison is to fall back to DAG timetable when no order_by is provided + if order_by.value == order_by.get_primary_key_string(): + latest_serdag = _get_latest_serdag(dag_id, session) + latest_dag = latest_serdag.dag + ordering = list(latest_dag.timetable.run_ordering) + order_by = SortParam( + allowed_attrs=ordering, + model=DagRun, + ).set_value(ordering[0]) + dag_runs_select_filter, _ = paginated_select( + statement=base_query, + order_by=order_by, + offset=offset, + filters=[run_after], + limit=limit, + ) + return session.execute(dag_runs_select_filter) + + +@grid_router.get( + "/ti_summaries/{dag_id}/{run_id}", + responses=create_openapi_http_exception_doc( + [ + status.HTTP_400_BAD_REQUEST, + status.HTTP_404_NOT_FOUND, + ] + ), + dependencies=[ + Depends( + requires_access_dag( + method="GET", + access_entity=DagAccessEntity.TASK_INSTANCE, + ) + ), + Depends( + requires_access_dag( + method="GET", + access_entity=DagAccessEntity.RUN, + ) + ), + ], + response_model_exclude_none=True, +) +def get_grid_ti_summaries( + dag_id: str, + run_id: str, + session: SessionDep, +) -> GridTISummaries: + """ + Get states for TIs / "groups" of TIs. + + Essentially this is to know what color to put in the squares in the grid. + + The tricky part here is that we aggregate the state for groups and mapped tasks. + + We don't add all the TIs for mapped TIs -- we only add one entry for the mapped task and + its state is an aggregate of its TI states. + + And for task groups, we add a "task" for that which is not really a task but is just + an entry that represents the group (so that we can show a filled in box when the group + is not expanded) and its state is an agg of those within it. + """ + tis_of_dag_runs, _ = paginated_select( + statement=( + select( + TaskInstance.task_id, + TaskInstance.state, + TaskInstance.dag_version_id, + ) + .where(TaskInstance.dag_id == dag_id) + .where( + TaskInstance.run_id == run_id, + ) + ), + filters=[], + order_by=SortParam(allowed_attrs=["task_id", "run_id"], model=TaskInstance).set_value("task_id"), + limit=None, + return_total_entries=False, + ) + task_instances = list(session.execute(tis_of_dag_runs)) + task_id_states = collections.defaultdict(list) + for ti in task_instances: + task_id_states[ti.task_id].append(ti.state) + + serdag = _get_serdag( + dag_id=dag_id, + dag_version_id=task_instances[0].dag_version_id, + session=session, + ) + if not serdag: + raise HTTPException(status.HTTP_404_NOT_FOUND, f"Dag with id {dag_id} was not found") + tis = list( + _find_aggregates( + node=serdag.dag.task_group, + parent_node=None, + ti_states=task_id_states, + ) + ) + + return { # type: ignore[return-value] + "run_id": run_id, + "dag_id": dag_id, + "task_instances": list(tis), + } + + +@grid_router.get( + "/latest_run/{dag_id}", + responses=create_openapi_http_exception_doc( + [ + status.HTTP_400_BAD_REQUEST, + status.HTTP_404_NOT_FOUND, + ] + ), + dependencies=[ + Depends( + requires_access_dag( + method="GET", + access_entity=DagAccessEntity.TASK_INSTANCE, + ) + ), + Depends( + requires_access_dag( + method="GET", + access_entity=DagAccessEntity.RUN, + ) + ), + ], + response_model_exclude_none=True, +) +def get_latest_run( + dag_id: str, + session: SessionDep, +) -> LatestRunResponse | None: + """ + Get information about the latest dag run by run_after. + + This is used by the UI to figure out if it needs to rerun queries and resume auto refresh. + """ + return session.execute( + select( + DagRun.id, + DagRun.dag_id, + DagRun.run_id, + DagRun.run_after, + ) + .where(DagRun.dag_id == dag_id) + .order_by(DagRun.run_after.desc()) + .limit(1) + ).one_or_none() diff --git a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py index 05fb79bd0bf29..738e4c6edf65f 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py @@ -26,7 +26,10 @@ from airflow.api_fastapi.core_api.datamodels.ui.structure import StructureDataResponse from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc from airflow.api_fastapi.core_api.security import requires_access_dag -from airflow.api_fastapi.core_api.services.ui.structure import get_upstream_assets +from airflow.api_fastapi.core_api.services.ui.structure import ( + bind_output_assets_to_tasks, + get_upstream_assets, +) from airflow.models.dag_version import DagVersion from airflow.models.serialized_dag import SerializedDagModel from airflow.utils.dag_edges import dag_edges @@ -119,7 +122,15 @@ def structure_data( elif ( dependency.target == dependency.dependency_type or dependency.source == dag_id ) and exit_node_ref: - end_edges.append({"source_id": exit_node_ref["id"], "target_id": dependency.node_id}) + end_edges.append( + { + "source_id": exit_node_ref["id"], + "target_id": dependency.node_id, + "resolved_from_alias": dependency.source.replace("asset-alias:", "", 1) + if dependency.source.startswith("asset-alias:") + else None, + } + ) # Add nodes nodes.append( @@ -135,8 +146,10 @@ def structure_data( asset_expression, entry_node_ref["id"] ) data["nodes"] += upstream_asset_nodes - data["edges"] = upstream_asset_edges + data["edges"] += upstream_asset_edges + + data["edges"] += start_edges + end_edges - data["edges"] += start_edges + edges + end_edges + bind_output_assets_to_tasks(data["edges"], serialized_dag, version_number, session) return StructureDataResponse(**data) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/grid.py b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/grid.py index 346676e14cd48..a69cafb7bbbda 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/grid.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/grid.py @@ -18,6 +18,8 @@ from __future__ import annotations import contextlib +from collections import Counter +from collections.abc import Iterable from uuid import UUID import structlog @@ -309,3 +311,61 @@ def _get_node_by_id(nodes, node_id): if node["id"] == node_id: return node return {} + + +def _is_task_node_mapped_task_group(task_node: BaseOperator | MappedTaskGroup | TaskMap | None) -> bool: + """Check if the Task Node is a Mapped Task Group.""" + return type(task_node) is MappedTaskGroup + + +def agg_state(states): + states = Counter(states) + for state in state_priority: + if state in states: + return state + return "no_status" + + +def _find_aggregates( + node: TaskGroup | BaseOperator | MappedTaskGroup | TaskMap, + parent_node: TaskGroup | BaseOperator | MappedTaskGroup | TaskMap | None, + ti_states: dict[str, list[str]], +) -> Iterable[dict]: + """Recursively fill the Task Group Map.""" + node_id = node.node_id + parent_id = parent_node.node_id if parent_node else None + + if node is None: + return + + if isinstance(node, MappedOperator): + yield { + "task_id": node_id, + "type": "mapped_task", + "parent_id": parent_id, + "state": agg_state(ti_states[node_id]), + } + + return + if isinstance(node, TaskGroup): + states = [] + for child in get_task_group_children_getter()(node): + for child_node in _find_aggregates(node=child, parent_node=node, ti_states=ti_states): + states.append(child_node["state"]) + yield child_node + if node_id: + yield { + "task_id": node_id, + "type": "group", + "parent_id": parent_id, + "state": agg_state(states), + } + return + if isinstance(node, BaseOperator): + yield { + "task_id": node_id, + "type": "task", + "parent_id": parent_id, + "state": agg_state(ti_states[node_id]), + } + return diff --git a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/structure.py b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/structure.py index 128dc93b7706d..db3d1ba6deac4 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/services/ui/structure.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/services/ui/structure.py @@ -23,6 +23,16 @@ from __future__ import annotations +from collections import defaultdict + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from airflow.models.asset import AssetAliasModel, AssetEvent +from airflow.models.dag_version import DagVersion +from airflow.models.dagrun import DagRun +from airflow.models.serialized_dag import SerializedDagModel + def get_upstream_assets( asset_expression: dict, entry_node_ref: str, level: int = 0 @@ -112,3 +122,64 @@ def get_upstream_assets( edges = edges + e return nodes, edges + + +def bind_output_assets_to_tasks( + edges: list[dict], serialized_dag: SerializedDagModel, version_number: int, session: Session +) -> None: + """ + Try to bind the downstream assets to the relevant task that produces them. + + This function will mutate the `edges` in place. + """ + # bind normal assets present in the `task_outlet_asset_references` + outlet_asset_references = serialized_dag.dag_model.task_outlet_asset_references + + downstream_asset_edges = [ + edge + for edge in edges + if edge["target_id"].startswith("asset:") and not edge.get("resolved_from_alias") + ] + + for edge in downstream_asset_edges: + # Try to attach the outlet assets to the relevant tasks + asset_id = int(edge["target_id"].replace("asset:", "", 1)) + outlet_asset_reference = next( + outlet_asset_reference + for outlet_asset_reference in outlet_asset_references + if outlet_asset_reference.asset_id == asset_id + ) + edge["source_id"] = outlet_asset_reference.task_id + + # bind assets resolved from aliases, they do not populate the `outlet_asset_references` + downstream_alias_resolved_edges = [ + edge for edge in edges if edge["target_id"].startswith("asset:") and edge.get("resolved_from_alias") + ] + + aliases_names = {edges["resolved_from_alias"] for edges in downstream_alias_resolved_edges} + + result = session.scalars( + select(AssetEvent) + .join(AssetEvent.source_aliases) + .join(AssetEvent.source_dag_run) + # That's a simplification, instead doing `version_number` in `DagRun.dag_versions`. + .join(DagRun.created_dag_version) + .where(AssetEvent.source_aliases.any(AssetAliasModel.name.in_(aliases_names))) + .where(AssetEvent.source_dag_run.has(DagRun.dag_id == serialized_dag.dag_model.dag_id)) + .where(DagVersion.version_number == version_number) + ).unique() + + asset_id_to_task_ids = defaultdict(set) + for asset_event in result: + asset_id_to_task_ids[asset_event.asset_id].add(asset_event.source_task_id) + + for edge in downstream_alias_resolved_edges: + asset_id = int(edge["target_id"].replace("asset:", "", 1)) + task_ids = asset_id_to_task_ids.get(asset_id, set()) + + for index, task_id in enumerate(task_ids): + if index == 0: + edge["source_id"] = task_id + continue + edge_copy = {**edge, "source_id": task_id} + edges.append(edge_copy) diff --git a/airflow-core/src/airflow/cli/commands/connection_command.py b/airflow-core/src/airflow/cli/commands/connection_command.py index aace3f9c9aede..9d2358b029b3d 100644 --- a/airflow-core/src/airflow/cli/commands/connection_command.py +++ b/airflow-core/src/airflow/cli/commands/connection_command.py @@ -369,7 +369,7 @@ def connections_test(args) -> None: print(f"Retrieving connection: {args.conn_id!r}") try: - conn = BaseHook.get_connection(args.conn_id) + conn = Connection.get_connection_from_secrets(args.conn_id) except AirflowNotFoundException: console.print("[bold yellow]\nConnection not found.\n") raise SystemExit(1) diff --git a/airflow-core/src/airflow/config_templates/config.yml b/airflow-core/src/airflow/config_templates/config.yml index 948299dec0d5c..92134fa4bf1cc 100644 --- a/airflow-core/src/airflow/config_templates/config.yml +++ b/airflow-core/src/airflow/config_templates/config.yml @@ -1348,6 +1348,12 @@ api: type: string example: ~ default: "False" + expose_stacktrace: + description: Expose stacktrace in the web server + version_added: ~ + type: string + example: ~ + default: "False" base_url: description: | The base url of the API server. Airflow cannot guess what domain or CNAME you are using. @@ -1519,6 +1525,15 @@ workers: type: float example: ~ default: "90.0" + socket_cleanup_timeout: + description: | + Number of seconds to wait after a task process exits before forcibly closing any + remaining communication sockets. This helps prevent the task supervisor from hanging + indefinitely due to missed EOF signals. + version_added: 3.0.3 + type: float + example: ~ + default: "60.0" api_auth: description: Settings relating to authentication on the Airflow APIs options: diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 9760da617a1d9..a3538f1e29191 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -18,9 +18,10 @@ from typing import TYPE_CHECKING +from sqlalchemy import delete + from airflow.configuration import conf from airflow.exceptions import AirflowConfigException -from airflow.models.dag_version import DagVersion from airflow.models.dagbundle import DagBundleModel from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.module_loading import import_string @@ -124,35 +125,20 @@ def parse_config(self) -> None: def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: self.log.debug("Syncing DAG bundles to the database") stored = {b.name: b for b in session.query(DagBundleModel).all()} - active_bundle_names = set(self._bundle_config.keys()) - for name in active_bundle_names: + for name in self._bundle_config.keys(): if bundle := stored.pop(name, None): bundle.active = True else: session.add(DagBundleModel(name=name)) self.log.info("Added new DAG bundle %s to the database", name) - inactive_bundle_names = [] + for name, bundle in stored.items(): bundle.active = False - inactive_bundle_names.append(name) self.log.warning("DAG bundle %s is no longer found in config and has been disabled", name) + from airflow.models.errors import ParseImportError - if inactive_bundle_names and active_bundle_names: - new_bundle_name = sorted(active_bundle_names)[0] - updated_rows = ( - session.query(DagVersion) - .filter(DagVersion.bundle_name.in_(inactive_bundle_names)) - .update( - {DagVersion.bundle_name: new_bundle_name}, - synchronize_session=False, - ) - ) - - self.log.info( - "Updated %d DAG versions from inactive bundles to active bundle %s", - updated_rows, - new_bundle_name, - ) + session.execute(delete(ParseImportError).where(ParseImportError.bundle_name == name)) + self.log.info("Deleted import errors for bundle %s which is no longer configured", name) def get_bundle(self, name: str, version: str | None = None) -> BaseDagBundle: """ diff --git a/airflow-core/src/airflow/dag_processing/manager.py b/airflow-core/src/airflow/dag_processing/manager.py index c143e682608a6..c3b4f809411ce 100644 --- a/airflow-core/src/airflow/dag_processing/manager.py +++ b/airflow-core/src/airflow/dag_processing/manager.py @@ -77,6 +77,8 @@ from airflow.utils.sqlalchemy import prohibit_commit, with_row_locks if TYPE_CHECKING: + from socket import socket + from sqlalchemy.orm import Session from airflow.callbacks.callback_requests import CallbackRequest @@ -388,17 +390,18 @@ def _service_processor_sockets(self, timeout: float | None = 1.0): """ events = self.selector.select(timeout=timeout) for key, _ in events: - socket_handler = key.data + socket_handler, on_close = key.data # BrokenPipeError should be caught and treated as if the handler returned false, similar # to EOF case try: need_more = socket_handler(key.fileobj) - except BrokenPipeError: + except (BrokenPipeError, ConnectionResetError): need_more = False if not need_more: - self.selector.unregister(key.fileobj) - key.fileobj.close() # type: ignore[union-attr] + sock: socket = key.fileobj # type: ignore[assignment] + on_close(sock) + sock.close() def _queue_requested_files_for_parsing(self) -> None: """Queue any files requested for parsing as requested by users via UI/API.""" diff --git a/airflow-core/src/airflow/dag_processing/processor.py b/airflow-core/src/airflow/dag_processing/processor.py index 5f69082c758aa..011393f22c886 100644 --- a/airflow-core/src/airflow/dag_processing/processor.py +++ b/airflow-core/src/airflow/dag_processing/processor.py @@ -68,7 +68,6 @@ class DagFileParseRequest(BaseModel): bundle_path: Path """Passing bundle path around lets us figure out relative file path.""" - requests_fd: int callback_requests: list[CallbackRequest] = Field(default_factory=list) type: Literal["DagFileParseRequest"] = "DagFileParseRequest" @@ -102,18 +101,16 @@ class DagFileParsingResult(BaseModel): def _parse_file_entrypoint(): import structlog - from airflow.sdk.execution_time import task_runner + from airflow.sdk.execution_time import comms, task_runner # Parse DAG file, send JSON back up! - comms_decoder = task_runner.CommsDecoder[ToDagProcessor, ToManager]( - input=sys.stdin, - decoder=TypeAdapter[ToDagProcessor](ToDagProcessor), + comms_decoder = comms.CommsDecoder[ToDagProcessor, ToManager]( + body_decoder=TypeAdapter[ToDagProcessor](ToDagProcessor), ) - msg = comms_decoder.get_message() + msg = comms_decoder._get_response() if not isinstance(msg, DagFileParseRequest): raise RuntimeError(f"Required first message to be a DagFileParseRequest, it was {msg}") - comms_decoder.request_socket = os.fdopen(msg.requests_fd, "wb", buffering=0) task_runner.SUPERVISOR_COMMS = comms_decoder log = structlog.get_logger(logger_name="task") @@ -125,7 +122,7 @@ def _parse_file_entrypoint(): result = _parse_file(msg, log) if result is not None: - comms_decoder.send_request(log, result) + comms_decoder.send(result) def _parse_file(msg: DagFileParseRequest, log: FilteringBoundLogger) -> DagFileParsingResult | None: @@ -266,20 +263,18 @@ def _on_child_started( msg = DagFileParseRequest( file=os.fspath(path), bundle_path=bundle_path, - requests_fd=self._requests_fd, callback_requests=callbacks, ) - self.send_msg(msg) + self.send_msg(msg, request_id=0) - def _handle_request(self, msg: ToManager, log: FilteringBoundLogger) -> None: # type: ignore[override] + def _handle_request(self, msg: ToManager, log: FilteringBoundLogger, req_id: int) -> None: # type: ignore[override] from airflow.sdk.api.datamodels._generated import ConnectionResponse, VariableResponse resp: BaseModel | None = None dump_opts = {} if isinstance(msg, DagFileParsingResult): self.parsing_result = msg - return - if isinstance(msg, GetConnection): + elif isinstance(msg, GetConnection): conn = self.client.connections.get(msg.conn_id) if isinstance(conn, ConnectionResponse): conn_result = ConnectionResult.from_conn_response(conn) @@ -301,10 +296,16 @@ def _handle_request(self, msg: ToManager, log: FilteringBoundLogger) -> None: # resp = self.client.variables.delete(msg.key) else: log.error("Unhandled request", msg=msg) + self.send_msg( + None, + request_id=req_id, + error=ErrorResponse( + detail={"status_code": 400, "message": "Unhandled request"}, + ), + ) return - if resp: - self.send_msg(resp, **dump_opts) + self.send_msg(resp, request_id=req_id, error=None, **dump_opts) @property def is_ready(self) -> bool: @@ -312,7 +313,7 @@ def is_ready(self) -> bool: # Process still alive, def can't be finished yet return False - return self._num_open_sockets == 0 + return not self._open_sockets def wait(self) -> int: raise NotImplementedError(f"Don't call wait on {type(self).__name__} objects") diff --git a/airflow-core/src/airflow/example_dags/example_asset_alias.py b/airflow-core/src/airflow/example_dags/example_asset_alias.py index d0a6a20188a92..5c4df1aa09c35 100644 --- a/airflow-core/src/airflow/example_dags/example_asset_alias.py +++ b/airflow-core/src/airflow/example_dags/example_asset_alias.py @@ -32,6 +32,7 @@ from __future__ import annotations +# [START example_asset_alias] import pendulum from airflow.sdk import DAG, Asset, AssetAlias, task @@ -94,3 +95,4 @@ def consume_asset_event_from_asset_alias(*, inlet_events=None): print(event) consume_asset_event_from_asset_alias() +# [END example_asset_alias] diff --git a/airflow-core/src/airflow/example_dags/example_assets.py b/airflow-core/src/airflow/example_dags/example_assets.py index 2bb3cffc527f8..3ab372112585c 100644 --- a/airflow-core/src/airflow/example_dags/example_assets.py +++ b/airflow-core/src/airflow/example_dags/example_assets.py @@ -52,6 +52,7 @@ from __future__ import annotations +# [START asset_def] import pendulum from airflow.providers.standard.operators.bash import BashOperator @@ -59,9 +60,7 @@ from airflow.timetables.assets import AssetOrTimeSchedule from airflow.timetables.trigger import CronTriggerTimetable -# [START asset_def] dag1_asset = Asset("s3://dag1/output_1.txt", extra={"hi": "bye"}) -# [END asset_def] dag2_asset = Asset("s3://dag2/output_1.txt", extra={"hi": "bye"}) dag3_asset = Asset("s3://dag3/output_3.txt", extra={"hi": "bye"}) @@ -189,3 +188,4 @@ task_id="conditional_asset_and_time_based_timetable", bash_command="sleep 5", ) +# [END asset_def] diff --git a/airflow-core/src/airflow/example_dags/example_dag_decorator.py b/airflow-core/src/airflow/example_dags/example_dag_decorator.py index c1f5b39233231..5d1312a888e80 100644 --- a/airflow-core/src/airflow/example_dags/example_dag_decorator.py +++ b/airflow-core/src/airflow/example_dags/example_dag_decorator.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +# [START dag_decorator_usage] from typing import TYPE_CHECKING, Any import httpx @@ -43,7 +44,6 @@ def execute(self, context: Context): return httpx.get(self.url).json() -# [START dag_decorator_usage] @dag( schedule=None, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), diff --git a/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py b/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py index 9f4f45511cf04..750c3da1ec17b 100644 --- a/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py +++ b/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py @@ -19,6 +19,7 @@ from __future__ import annotations +# [START example_dynamic_task_mapping] from datetime import datetime from airflow.sdk import DAG, task @@ -56,3 +57,5 @@ def add_10(num): _get_nums = get_nums() _times_2 = times_2.expand(num=_get_nums) add_10.expand(num=_times_2) + +# [END example_dynamic_task_mapping] diff --git a/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py b/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py index e554b4f9cae89..8b68f85ef826d 100644 --- a/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py +++ b/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py @@ -19,6 +19,7 @@ from __future__ import annotations +# [START example_setup_teardown_taskflow] import pendulum from airflow.sdk import DAG, setup, task, task_group, teardown @@ -104,3 +105,4 @@ def inner_teardown(cluster_id): # and let's put section 1 inside the outer setup and teardown tasks section_1() +# [END example_setup_teardown_taskflow] diff --git a/airflow-core/src/airflow/example_dags/example_simplest_dag.py b/airflow-core/src/airflow/example_dags/example_simplest_dag.py index fad6f57950a9e..660f38c2e00e1 100644 --- a/airflow-core/src/airflow/example_dags/example_simplest_dag.py +++ b/airflow-core/src/airflow/example_dags/example_simplest_dag.py @@ -18,6 +18,7 @@ from __future__ import annotations +# [START simplest_dag] from airflow.sdk import dag, task @@ -30,4 +31,6 @@ def my_task(): my_task() +# [END simplest_dag] + example_simplest_dag() diff --git a/airflow-core/src/airflow/example_dags/example_task_group_decorator.py b/airflow-core/src/airflow/example_dags/example_task_group_decorator.py index 580b8bca5226a..5ed2a59ae3b64 100644 --- a/airflow-core/src/airflow/example_dags/example_task_group_decorator.py +++ b/airflow-core/src/airflow/example_dags/example_task_group_decorator.py @@ -19,12 +19,12 @@ from __future__ import annotations +# [START howto_task_group_decorator] import pendulum from airflow.sdk import DAG, task, task_group -# [START howto_task_group_decorator] # Creating Tasks @task def task_start(): diff --git a/airflow-core/src/airflow/example_dags/example_xcomargs.py b/airflow-core/src/airflow/example_dags/example_xcomargs.py index 6337cf482d98f..a64beb513baef 100644 --- a/airflow-core/src/airflow/example_dags/example_xcomargs.py +++ b/airflow-core/src/airflow/example_dags/example_xcomargs.py @@ -19,6 +19,7 @@ from __future__ import annotations +# [START example_xcomargs] import logging import pendulum @@ -63,3 +64,4 @@ def print_value(value, ts=None): xcom_args_b = print_value("second!") bash_op1 >> xcom_args_a >> xcom_args_b >> bash_op2 +# [END example_xcomargs] diff --git a/airflow-core/src/airflow/jobs/scheduler_job_runner.py b/airflow-core/src/airflow/jobs/scheduler_job_runner.py index 586a2053b83eb..4185cbf6d4425 100644 --- a/airflow-core/src/airflow/jobs/scheduler_job_runner.py +++ b/airflow-core/src/airflow/jobs/scheduler_job_runner.py @@ -32,7 +32,7 @@ from itertools import groupby from typing import TYPE_CHECKING, Any, Callable -from sqlalchemy import and_, delete, exists, func, select, text, tuple_, update +from sqlalchemy import and_, delete, desc, exists, func, select, text, tuple_, update from sqlalchemy.exc import OperationalError from sqlalchemy.orm import joinedload, lazyload, load_only, make_transient, selectinload from sqlalchemy.sql import expression @@ -1998,7 +1998,7 @@ def _maybe_requeue_stuck_ti(self, *, ti, session): """ num_times_stuck = self._get_num_times_stuck_in_queued(ti, session) if num_times_stuck < self._num_stuck_queued_retries: - self.log.info("Task stuck in queued; will try to requeue. task_id=%s", ti.task_id) + self.log.info("Task stuck in queued; will try to requeue. task_instance=%s", ti) session.add( Log( event=TASK_STUCK_IN_QUEUED_RESCHEDULE_EVENT, @@ -2043,19 +2043,34 @@ def _get_num_times_stuck_in_queued(self, ti: TaskInstance, session: Session = NE We can then use this information to determine whether to reschedule a task or fail it. """ - return ( - session.query(Log) + last_running_time = session.scalar( + select(Log.dttm) .where( - Log.task_id == ti.task_id, Log.dag_id == ti.dag_id, + Log.task_id == ti.task_id, Log.run_id == ti.run_id, Log.map_index == ti.map_index, Log.try_number == ti.try_number, - Log.event == TASK_STUCK_IN_QUEUED_RESCHEDULE_EVENT, + Log.event == "running", ) - .count() + .order_by(desc(Log.dttm)) + .limit(1) + ) + + query = session.query(Log).where( + Log.task_id == ti.task_id, + Log.dag_id == ti.dag_id, + Log.run_id == ti.run_id, + Log.map_index == ti.map_index, + Log.try_number == ti.try_number, + Log.event == TASK_STUCK_IN_QUEUED_RESCHEDULE_EVENT, ) + if last_running_time: + query = query.where(Log.dttm > last_running_time) + + return query.count() + previous_ti_running_metrics: dict[tuple[str, str, str], int] = {} @provide_session diff --git a/airflow-core/src/airflow/jobs/triggerer_job_runner.py b/airflow-core/src/airflow/jobs/triggerer_job_runner.py index 5df6a03261523..512f8a4f0e6aa 100644 --- a/airflow-core/src/airflow/jobs/triggerer_job_runner.py +++ b/airflow-core/src/airflow/jobs/triggerer_job_runner.py @@ -28,6 +28,7 @@ from collections.abc import Generator, Iterable from contextlib import suppress from datetime import datetime +from socket import socket from traceback import format_exception from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Literal, TypedDict, Union @@ -43,6 +44,7 @@ from airflow.jobs.job import perform_heartbeat from airflow.models.trigger import Trigger from airflow.sdk.execution_time.comms import ( + CommsDecoder, ConnectionResult, DagRunStateResult, DRCount, @@ -58,6 +60,7 @@ TICount, VariableResult, XComResult, + _RequestFrame, ) from airflow.sdk.execution_time.supervisor import WatchedSubprocess, make_buffered_socket_reader from airflow.stats import Stats @@ -70,8 +73,6 @@ from airflow.utils.session import provide_session if TYPE_CHECKING: - from socket import socket - from sqlalchemy.orm import Session from structlog.typing import FilteringBoundLogger, WrappedLogger @@ -181,7 +182,6 @@ class messages: class StartTriggerer(BaseModel): """Tell the async trigger runner process to start, and where to send status update messages.""" - requests_fd: int type: Literal["StartTriggerer"] = "StartTriggerer" class TriggerStateChanges(BaseModel): @@ -295,7 +295,7 @@ class TriggerRunnerSupervisor(WatchedSubprocess): """ TriggerRunnerSupervisor is responsible for monitoring the subprocess and marshalling DB access. - This class (which runs in the main process) is responsible for querying the DB, sending RunTrigger + This class (which runs in the main/sync process) is responsible for querying the DB, sending RunTrigger workload messages to the subprocess, and collecting results and updating them in the DB. """ @@ -342,8 +342,8 @@ def start( # type: ignore[override] ): proc = super().start(id=job.id, job=job, target=cls.run_in_process, logger=logger, **kwargs) - msg = messages.StartTriggerer(requests_fd=proc._requests_fd) - proc.send_msg(msg) + msg = messages.StartTriggerer() + proc.send_msg(msg, request_id=0) return proc @functools.cached_property @@ -355,7 +355,7 @@ def client(self) -> Client: client.base_url = "http://in-process.invalid./" # type: ignore[assignment] return client - def _handle_request(self, msg: ToTriggerSupervisor, log: FilteringBoundLogger) -> None: # type: ignore[override] + def _handle_request(self, msg: ToTriggerSupervisor, log: FilteringBoundLogger, req_id: int) -> None: # type: ignore[override] from airflow.sdk.api.datamodels._generated import ( ConnectionResponse, TaskStatesResponse, @@ -396,7 +396,8 @@ def _handle_request(self, msg: ToTriggerSupervisor, log: FilteringBoundLogger) - if isinstance(conn, ConnectionResponse): conn_result = ConnectionResult.from_conn_response(conn) resp = conn_result - dump_opts = {"exclude_unset": True} + # `by_alias=True` is used to convert the `schema` field to `schema_` in the Connection model + dump_opts = {"exclude_unset": True, "by_alias": True} else: resp = conn elif isinstance(msg, GetVariable): @@ -454,8 +455,7 @@ def _handle_request(self, msg: ToTriggerSupervisor, log: FilteringBoundLogger) - else: raise ValueError(f"Unknown message type {type(msg)}") - if resp: - self.send_msg(resp, **dump_opts) + self.send_msg(resp, request_id=req_id, error=None, **dump_opts) def run(self) -> None: """Run synchronously and handle all database reads/writes.""" @@ -628,7 +628,7 @@ def _register_pipe_readers(self, stdout: socket, stderr: socket, requests: socke ), ) - def _process_log_messages_from_subprocess(self) -> Generator[None, bytes, None]: + def _process_log_messages_from_subprocess(self) -> Generator[None, bytes | bytearray, None]: import msgspec from structlog.stdlib import NAME_TO_LEVEL @@ -691,14 +691,60 @@ class TriggerDetails(TypedDict): events: int +@attrs.define(kw_only=True) +class TriggerCommsDecoder(CommsDecoder[ToTriggerRunner, ToTriggerSupervisor]): + _async_writer: asyncio.StreamWriter = attrs.field(alias="async_writer") + _async_reader: asyncio.StreamReader = attrs.field(alias="async_reader") + + body_decoder: TypeAdapter[ToTriggerRunner] = attrs.field( + factory=lambda: TypeAdapter(ToTriggerRunner), repr=False + ) + + _lock: asyncio.Lock = attrs.field(factory=asyncio.Lock, repr=False) + + def _read_frame(self): + from asgiref.sync import async_to_sync + + return async_to_sync(self._aread_frame)() + + def send(self, msg: ToTriggerSupervisor) -> ToTriggerRunner | None: + from asgiref.sync import async_to_sync + + return async_to_sync(self.asend)(msg) + + async def _aread_frame(self): + len_bytes = await self._async_reader.readexactly(4) + len = int.from_bytes(len_bytes, byteorder="big") + if len >= 2**32: + raise OverflowError(f"Refusing to receive messages larger than 4GiB {len=}") + + buffer = await self._async_reader.readexactly(len) + return self.resp_decoder.decode(buffer) + + async def _aget_response(self, expect_id: int) -> ToTriggerRunner | None: + frame = await self._aread_frame() + if frame.id != expect_id: + # Given the lock we take out in `asend`, this _shouldn't_ be possible, but I'd rather fail with + # this explicit error return the wrong type of message back to a Trigger + raise RuntimeError(f"Response read out of order! Got {frame.id=}, {expect_id=}") + return self._from_frame(frame) + + async def asend(self, msg: ToTriggerSupervisor) -> ToTriggerRunner | None: + frame = _RequestFrame(id=next(self.id_counter), body=msg.model_dump()) + bytes = frame.as_bytes() + + async with self._lock: + self._async_writer.write(bytes) + + return await self._aget_response(frame.id) + + class TriggerRunner: """ Runtime environment for all triggers. - Mainly runs inside its own thread, where it hands control off to an asyncio - event loop, but is also sometimes interacted with from the main thread - (where all the DB queries are done). All communication between threads is - done via Deques. + Mainly runs inside its own process, where it hands control off to an asyncio + event loop. All communication between this and it's (sync) supervisor is done via sockets """ # Maps trigger IDs to their running tasks and other info @@ -726,10 +772,7 @@ class TriggerRunner: # TODO: connect this to the parent process log: FilteringBoundLogger = structlog.get_logger() - requests_sock: asyncio.StreamWriter - response_sock: asyncio.StreamReader - - decoder: TypeAdapter[ToTriggerRunner] + comms_decoder: TriggerCommsDecoder def __init__(self): super().__init__() @@ -740,7 +783,6 @@ def __init__(self): self.events = deque() self.failed_triggers = deque() self.job_id = None - self.decoder = TypeAdapter(ToTriggerRunner) def run(self): """Sync entrypoint - just run a run in an async loop.""" @@ -796,36 +838,21 @@ async def init_comms(self): """ from airflow.sdk.execution_time import task_runner - loop = asyncio.get_event_loop() + # Yes, we read and write to stdin! It's a socket, not a normal stdin. + reader, writer = await asyncio.open_connection(sock=socket(fileno=0)) - comms_decoder = task_runner.CommsDecoder[ToTriggerRunner, ToTriggerSupervisor]( - input=sys.stdin, - decoder=self.decoder, + self.comms_decoder = TriggerCommsDecoder( + async_writer=writer, + async_reader=reader, ) - task_runner.SUPERVISOR_COMMS = comms_decoder - - async def connect_stdin() -> asyncio.StreamReader: - reader = asyncio.StreamReader() - protocol = asyncio.StreamReaderProtocol(reader) - await loop.connect_read_pipe(lambda: protocol, sys.stdin) - return reader - - self.response_sock = await connect_stdin() + task_runner.SUPERVISOR_COMMS = self.comms_decoder - line = await self.response_sock.readline() + msg = await self.comms_decoder._aget_response(expect_id=0) - msg = self.decoder.validate_json(line) if not isinstance(msg, messages.StartTriggerer): raise RuntimeError(f"Required first message to be a messages.StartTriggerer, it was {msg}") - comms_decoder.request_socket = os.fdopen(msg.requests_fd, "wb", buffering=0) - writer_transport, writer_protocol = await loop.connect_write_pipe( - lambda: asyncio.streams.FlowControlMixin(loop=loop), - comms_decoder.request_socket, - ) - self.requests_sock = asyncio.streams.StreamWriter(writer_transport, writer_protocol, None, loop) - async def create_triggers(self): """Drain the to_create queue and create all new triggers that have been requested in the DB.""" while self.to_create: @@ -848,8 +875,16 @@ async def create_triggers(self): await asyncio.sleep(0) try: - kwargs = Trigger._decrypt_kwargs(workload.encrypted_kwargs) - trigger_instance = trigger_class(**kwargs) + from airflow.serialization.serialized_objects import smart_decode_trigger_kwargs + + # Decrypt and clean trigger kwargs before for execution + # Note: We only clean up serialization artifacts (__var, __type keys) here, + # not in `_decrypt_kwargs` because it is used during hash comparison in + # add_asset_trigger_references and could lead to adverse effects like hash mismatches + # that could cause None values in collections. + kw = Trigger._decrypt_kwargs(workload.encrypted_kwargs) + deserialised_kwargs = {k: smart_decode_trigger_kwargs(v) for k, v in kw.items()} + trigger_instance = trigger_class(**deserialised_kwargs) except TypeError as err: self.log.error("Trigger failed to inflate", error=err) self.failed_triggers.append((trigger_id, err)) @@ -934,8 +969,6 @@ async def cleanup_finished_triggers(self) -> list[int]: return finished_ids async def sync_state_to_supervisor(self, finished_ids: list[int]): - from airflow.sdk.execution_time.task_runner import SUPERVISOR_COMMS - # Copy out of our deques in threadsafe manner to sync state with parent events_to_send = [] while self.events: @@ -961,19 +994,17 @@ async def sync_state_to_supervisor(self, finished_ids: list[int]): if not finished_ids: msg.finished = None - # Block triggers from making any requests for the duration of this - async with SUPERVISOR_COMMS.lock: - # Tell the monitor that we've finished triggers so it can update things - self.requests_sock.write(msg.model_dump_json(exclude_none=True).encode() + b"\n") - line = await self.response_sock.readline() - - if line == b"": # EoF received! + # Tell the monitor that we've finished triggers so it can update things + try: + resp = await self.comms_decoder.asend(msg) + except asyncio.IncompleteReadError: if task := asyncio.current_task(): task.cancel("EOF - shutting down") + return + raise - resp = self.decoder.validate_json(line) if not isinstance(resp, messages.TriggerStateSync): - raise RuntimeError(f"Expected to get a TriggerStateSync message, instead we got f{type(msg)}") + raise RuntimeError(f"Expected to get a TriggerStateSync message, instead we got {type(msg)}") self.to_create.extend(resp.to_create) self.to_cancel.extend(resp.to_cancel) diff --git a/airflow-core/src/airflow/migrations/utils.py b/airflow-core/src/airflow/migrations/utils.py index 9305606873549..2dbbbece01a57 100644 --- a/airflow-core/src/airflow/migrations/utils.py +++ b/airflow-core/src/airflow/migrations/utils.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import contextlib from collections import defaultdict from contextlib import contextmanager @@ -103,3 +104,11 @@ def mysql_drop_index_if_exists(index_name, table_name, op): SELECT 1; END IF; """) + + +def ignore_sqlite_value_error(): + from alembic import op + + if op.get_bind().dialect.name == "sqlite": + return contextlib.suppress(ValueError) + return contextlib.nullcontext() diff --git a/airflow-core/src/airflow/migrations/versions/0017_2_9_2_fix_inconsistency_between_ORM_and_migration_files.py b/airflow-core/src/airflow/migrations/versions/0017_2_9_2_fix_inconsistency_between_ORM_and_migration_files.py index 0a62b550d40b9..fa24916df6faa 100644 --- a/airflow-core/src/airflow/migrations/versions/0017_2_9_2_fix_inconsistency_between_ORM_and_migration_files.py +++ b/airflow-core/src/airflow/migrations/versions/0017_2_9_2_fix_inconsistency_between_ORM_and_migration_files.py @@ -243,8 +243,12 @@ def upgrade(): ) """) ) - - conn.execute(sa.text("INSERT INTO dag_run_new SELECT * FROM dag_run")) + headers = ( + "id, dag_id, queued_at, execution_date, start_date, end_date, state, run_id, creating_job_id, " + "external_trigger, run_type, conf, data_interval_start, data_interval_end, " + "last_scheduling_decision, dag_hash, log_template_id, updated_at, clear_number" + ) + conn.execute(sa.text(f"INSERT INTO dag_run_new ({headers}) SELECT {headers} FROM dag_run")) conn.execute(sa.text("DROP TABLE dag_run")) conn.execute(sa.text("ALTER TABLE dag_run_new RENAME TO dag_run")) conn.execute(sa.text("PRAGMA foreign_keys=on")) diff --git a/airflow-core/src/airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py b/airflow-core/src/airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py index a4d4238816a2c..2cad82e1e7f63 100644 --- a/airflow-core/src/airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py +++ b/airflow-core/src/airflow/migrations/versions/0047_3_0_0_add_dag_versioning.py @@ -32,6 +32,7 @@ from sqlalchemy_utils import UUIDType from airflow.migrations.db_types import TIMESTAMP, StringID +from airflow.migrations.utils import ignore_sqlite_value_error from airflow.models.base import naming_convention from airflow.utils import timezone @@ -55,16 +56,26 @@ def upgrade(): sa.Column("dag_id", StringID(), nullable=False), sa.Column("created_at", TIMESTAMP(), nullable=False, default=timezone.utcnow), sa.Column( - "last_updated", TIMESTAMP(), nullable=False, default=timezone.utcnow, onupdate=timezone.utcnow + "last_updated", + TIMESTAMP(), + nullable=False, + default=timezone.utcnow, + onupdate=timezone.utcnow, ), sa.ForeignKeyConstraint( - ("dag_id",), ["dag.dag_id"], name=op.f("dag_version_dag_id_fkey"), ondelete="CASCADE" + ("dag_id",), + ["dag.dag_id"], + name=op.f("dag_version_dag_id_fkey"), + ondelete="CASCADE", ), sa.PrimaryKeyConstraint("id", name=op.f("dag_version_pkey")), sa.UniqueConstraint("dag_id", "version_number", name="dag_id_v_name_v_number_unique_constraint"), ) - with op.batch_alter_table("dag_code") as batch_op: + + with ignore_sqlite_value_error(), op.batch_alter_table("dag_code") as batch_op: batch_op.drop_constraint("dag_code_pkey", type_="primary") + + with op.batch_alter_table("dag_code") as batch_op: batch_op.drop_column("fileloc_hash") batch_op.add_column(sa.Column("id", UUIDType(binary=False), nullable=False)) batch_op.create_primary_key("dag_code_pkey", ["id"]) @@ -81,8 +92,10 @@ def upgrade(): ) batch_op.create_unique_constraint("dag_code_dag_version_id_uq", ["dag_version_id"]) - with op.batch_alter_table("serialized_dag") as batch_op: + with ignore_sqlite_value_error(), op.batch_alter_table("serialized_dag") as batch_op: batch_op.drop_constraint("serialized_dag_pkey", type_="primary") + + with op.batch_alter_table("serialized_dag") as batch_op: batch_op.drop_index("idx_fileloc_hash") batch_op.drop_column("fileloc_hash") batch_op.drop_column("fileloc") diff --git a/airflow-core/src/airflow/migrations/versions/0069_3_0_3_delete_import_errors.py b/airflow-core/src/airflow/migrations/versions/0069_3_0_3_delete_import_errors.py new file mode 100644 index 0000000000000..c0f267b97b9d7 --- /dev/null +++ b/airflow-core/src/airflow/migrations/versions/0069_3_0_3_delete_import_errors.py @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Delete import errors. + +Revision ID: fe199e1abd77 +Revises: 29ce7909c52b +Create Date: 2025-06-10 08:53:28.782896 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "fe199e1abd77" +down_revision = "29ce7909c52b" +branch_labels = None +depends_on = None + +airflow_version = "3.0.3" + + +def upgrade(): + """Apply Delete import errors.""" + # delete import_error table rows + op.get_bind().execute(sa.text("DELETE FROM import_error")) + + +def downgrade(): + """Unapply Delete import errors.""" + pass diff --git a/airflow-core/src/airflow/models/dag.py b/airflow-core/src/airflow/models/dag.py index 57c1050e3ce94..77f96d590cc5e 100644 --- a/airflow-core/src/airflow/models/dag.py +++ b/airflow-core/src/airflow/models/dag.py @@ -1794,7 +1794,7 @@ def create_tasks(task): if isinstance(task, TaskGroup): return task_group_map[task.group_id] - new_task = copy.deepcopy(task) + new_task = copy.copy(task) # Only overwrite the specific attributes we want to change new_task.task_id = task.task_id diff --git a/airflow-core/src/airflow/models/mappedoperator.py b/airflow-core/src/airflow/models/mappedoperator.py index 5ecdf14f59051..bc598ef0469a8 100644 --- a/airflow-core/src/airflow/models/mappedoperator.py +++ b/airflow-core/src/airflow/models/mappedoperator.py @@ -68,7 +68,7 @@ def expand_start_from_trigger(self, *, context: Context, session: Session) -> bo task_id=self.task_id, dag_id=self.dag_id, ) - return False + return False # start_from_trigger only makes sense when start_trigger_args exists. if not self.start_trigger_args: return False diff --git a/airflow-core/src/airflow/models/serialized_dag.py b/airflow-core/src/airflow/models/serialized_dag.py index 9e4c6115d92ef..71722b54adee6 100644 --- a/airflow-core/src/airflow/models/serialized_dag.py +++ b/airflow-core/src/airflow/models/serialized_dag.py @@ -416,11 +416,16 @@ def write_dag( serialized_dag_hash = session.scalars( select(cls.dag_hash).where(cls.dag_id == dag.dag_id).order_by(cls.created_at.desc()) ).first() + dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) - if serialized_dag_hash is not None and serialized_dag_hash == new_serialized_dag.dag_hash: + if ( + serialized_dag_hash == new_serialized_dag.dag_hash + and dag_version + and dag_version.bundle_name == bundle_name + ): log.debug("Serialized DAG (%s) is unchanged. Skipping writing to DB", dag.dag_id) return False - dag_version = DagVersion.get_latest_version(dag.dag_id, session=session) + if dag_version and not dag_version.task_instances: # This is for dynamic DAGs that the hashes changes often. We should update # the serialized dag, the dag_version and the dag_code instead of a new version diff --git a/airflow-core/src/airflow/models/taskinstancehistory.py b/airflow-core/src/airflow/models/taskinstancehistory.py index c7c6eb79a6008..079d58efc09d9 100644 --- a/airflow-core/src/airflow/models/taskinstancehistory.py +++ b/airflow-core/src/airflow/models/taskinstancehistory.py @@ -52,6 +52,7 @@ if TYPE_CHECKING: from sqlalchemy.orm.session import Session + from airflow.models import DagRun from airflow.models.taskinstance import TaskInstance @@ -113,6 +114,13 @@ class TaskInstanceHistory(Base): foreign_keys=[dag_version_id], ) + dag_run = relationship( + "DagRun", + primaryjoin="TaskInstanceHistory.run_id == DagRun.run_id", + viewonly=True, + foreign_keys=[run_id], + ) + def __init__( self, ti: TaskInstance, @@ -154,6 +162,11 @@ def __init__( Index("idx_tih_dag_run", dag_id, run_id), ) + @property + def id(self) -> str: + """Alias for primary key field to support TaskInstance.""" + return self.task_instance_id + @staticmethod @provide_session def record_ti(ti: TaskInstance, session: Session = NEW_SESSION) -> None: @@ -176,3 +189,8 @@ def record_ti(ti: TaskInstance, session: Session = NEW_SESSION) -> None: ti.set_duration() ti_history = TaskInstanceHistory(ti, state=ti_history_state) session.add(ti_history) + + @provide_session + def get_dagrun(self, session: Session = NEW_SESSION) -> DagRun: + """Return the DagRun for this TaskInstanceHistory, matching TaskInstance.""" + return self.dag_run diff --git a/airflow-core/src/airflow/serialization/serialized_objects.py b/airflow-core/src/airflow/serialization/serialized_objects.py index 2a8f3cd9da671..e3cf0f019d5d1 100644 --- a/airflow-core/src/airflow/serialization/serialized_objects.py +++ b/airflow-core/src/airflow/serialization/serialized_objects.py @@ -337,19 +337,20 @@ def decode_asset_condition(var: dict[str, Any]) -> BaseAsset: raise ValueError(f"deserialization not implemented for DAT {dat!r}") -def decode_asset(var: dict[str, Any]): - def _smart_decode_trigger_kwargs(d): - """ - Slightly clean up kwargs for display. +def smart_decode_trigger_kwargs(d): + """ + Slightly clean up kwargs for display or execution. - This detects one level of BaseSerialization and tries to deserialize the - content, removing some __type __var ugliness when the value is displayed - in UI to the user. - """ - if not isinstance(d, dict) or Encoding.TYPE not in d: - return d - return BaseSerialization.deserialize(d) + This detects one level of BaseSerialization and tries to deserialize the + content, removing some __type __var ugliness when the value is displayed + in UI to the user and/or while execution. + """ + if not isinstance(d, dict) or Encoding.TYPE not in d: + return d + return BaseSerialization.deserialize(d) + +def decode_asset(var: dict[str, Any]): watchers = var.get("watchers", []) return Asset( name=var["name"], @@ -361,7 +362,7 @@ def _smart_decode_trigger_kwargs(d): name=watcher["name"], trigger={ "classpath": watcher["trigger"]["classpath"], - "kwargs": _smart_decode_trigger_kwargs(watcher["trigger"]["kwargs"]), + "kwargs": smart_decode_trigger_kwargs(watcher["trigger"]["kwargs"]), }, ) for watcher in watchers @@ -2177,16 +2178,14 @@ def get_task_assets( if isinstance(obj, of_type): yield task["task_id"], obj - def get_run_data_interval(self, run: DagRun) -> DataInterval: + def get_run_data_interval(self, run: DagRun) -> DataInterval | None: """Get the data interval of this run.""" if run.dag_id is not None and run.dag_id != self.dag_id: raise ValueError(f"Arguments refer to different DAGs: {self.dag_id} != {run.dag_id}") data_interval = _get_model_data_interval(run, "data_interval_start", "data_interval_end") - # the older implementation has call to infer_automated_data_interval if data_interval is None, do we want to keep that or raise - # an exception? - if data_interval is None: - raise ValueError(f"Cannot calculate data interval for run {run}") + if data_interval is None and run.logical_date is not None: + data_interval = self._real_dag.timetable.infer_manual_data_interval(run_after=run.logical_date) return data_interval diff --git a/airflow-core/src/airflow/settings.py b/airflow-core/src/airflow/settings.py index 0056b93497a84..413f59aa191da 100644 --- a/airflow-core/src/airflow/settings.py +++ b/airflow-core/src/airflow/settings.py @@ -613,7 +613,9 @@ def initialize(): # The webservers import this file from models.py with the default settings. if not os.environ.get("PYTHON_OPERATORS_VIRTUAL_ENV_MODE", None): - configure_orm() + is_worker = os.environ.get("_AIRFLOW__REEXECUTED_PROCESS") == "1" + if not is_worker: + configure_orm() configure_action_logging() # mask the sensitive_config_values diff --git a/airflow-core/src/airflow/timetables/events.py b/airflow-core/src/airflow/timetables/events.py index 42b5d13e2ec78..ac2c0b6131982 100644 --- a/airflow-core/src/airflow/timetables/events.py +++ b/airflow-core/src/airflow/timetables/events.py @@ -62,7 +62,8 @@ def __init__( if description is None: if self.event_dates: self.description = ( - f"{len(self.event_dates)} events between {self.event_dates[0]} and {self.event_dates[-1]}" + f"{len(self.event_dates)} events between " + f"{self.event_dates[0].isoformat(sep='T')} and {self.event_dates[-1].isoformat(sep='T')}" ) else: self.description = "No events" @@ -123,12 +124,17 @@ def serialize(self): return { "event_dates": [x.isoformat(sep="T") for x in self.event_dates], "restrict_to_events": self.restrict_to_events, + "description": self.description, + "_summary": self._summary, } @classmethod def deserialize(cls, data) -> Timetable: - return cls( - [pendulum.DateTime.fromisoformat(x) for x in data["event_dates"]], - data["restrict_to_events"], + time_table = cls( + event_dates=[pendulum.DateTime.fromisoformat(x) for x in data["event_dates"]], + restrict_to_events=data["restrict_to_events"], presorted=True, + description=data["description"], ) + time_table._summary = data["_summary"] + return time_table diff --git a/airflow-core/src/airflow/traces/otel_tracer.py b/airflow-core/src/airflow/traces/otel_tracer.py index d5e71e3f47e05..34ee543a56d6a 100644 --- a/airflow-core/src/airflow/traces/otel_tracer.py +++ b/airflow-core/src/airflow/traces/otel_tracer.py @@ -19,6 +19,7 @@ import logging import random +from contextlib import AbstractContextManager from typing import TYPE_CHECKING from opentelemetry import trace @@ -247,7 +248,7 @@ def _new_span( links=None, start_time=None, start_as_current: bool = True, - ): + ) -> AbstractContextManager[trace.span.Span] | trace.span.Span: if component is None: component = self.otel_service @@ -260,24 +261,24 @@ def _new_span( links = [] if start_as_current: - span = tracer.start_as_current_span( - name=span_name, - context=parent_context, - links=links, - start_time=datetime_to_nano(start_time), - ) - else: - span = tracer.start_span( + return tracer.start_as_current_span( name=span_name, context=parent_context, links=links, start_time=datetime_to_nano(start_time), ) - current_span_ctx = trace.set_span_in_context(NonRecordingSpan(span.get_span_context())) - # We have to manually make the span context as the active context. - # If the span needs to be injected into the carrier, then this is needed to make sure - # that the injected context will point to the span context that was just created. - attach(current_span_ctx) + + span = tracer.start_span( + name=span_name, + context=parent_context, + links=links, + start_time=datetime_to_nano(start_time), + ) + current_span_ctx = trace.set_span_in_context(NonRecordingSpan(span.get_span_context())) + # We have to manually make the span context as the active context. + # If the span needs to be injected into the carrier, then this is needed to make sure + # that the injected context will point to the span context that was just created. + attach(current_span_ctx) return span def inject(self) -> dict: diff --git a/airflow-core/src/airflow/ui/openapi-gen/queries/common.ts b/airflow-core/src/airflow/ui/openapi-gen/queries/common.ts index a7efa4868c859..fb8e969aad598 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/queries/common.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/queries/common.ts @@ -1806,6 +1806,94 @@ export const UseGridServiceGridDataKeyFn = ( }, ]), ]; +export type GridServiceGetDagStructureDefaultResponse = Awaited< + ReturnType +>; +export type GridServiceGetDagStructureQueryResult< + TData = GridServiceGetDagStructureDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useGridServiceGetDagStructureKey = "GridServiceGetDagStructure"; +export const UseGridServiceGetDagStructureKeyFn = ( + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, + queryKey?: Array, +) => [ + useGridServiceGetDagStructureKey, + ...(queryKey ?? [{ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }]), +]; +export type GridServiceGetGridRunsDefaultResponse = Awaited>; +export type GridServiceGetGridRunsQueryResult< + TData = GridServiceGetGridRunsDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useGridServiceGetGridRunsKey = "GridServiceGetGridRuns"; +export const UseGridServiceGetGridRunsKeyFn = ( + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, + queryKey?: Array, +) => [ + useGridServiceGetGridRunsKey, + ...(queryKey ?? [{ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }]), +]; +export type GridServiceGetGridTiSummariesDefaultResponse = Awaited< + ReturnType +>; +export type GridServiceGetGridTiSummariesQueryResult< + TData = GridServiceGetGridTiSummariesDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useGridServiceGetGridTiSummariesKey = "GridServiceGetGridTiSummaries"; +export const UseGridServiceGetGridTiSummariesKeyFn = ( + { + dagId, + runId, + }: { + dagId: string; + runId: string; + }, + queryKey?: Array, +) => [useGridServiceGetGridTiSummariesKey, ...(queryKey ?? [{ dagId, runId }])]; +export type GridServiceGetLatestRunDefaultResponse = Awaited>; +export type GridServiceGetLatestRunQueryResult< + TData = GridServiceGetLatestRunDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useGridServiceGetLatestRunKey = "GridServiceGetLatestRun"; +export const UseGridServiceGetLatestRunKeyFn = ( + { + dagId, + }: { + dagId: string; + }, + queryKey?: Array, +) => [useGridServiceGetLatestRunKey, ...(queryKey ?? [{ dagId }])]; export type AssetServiceCreateAssetEventMutationResult = Awaited< ReturnType >; diff --git a/airflow-core/src/airflow/ui/openapi-gen/queries/ensureQueryData.ts b/airflow-core/src/airflow/ui/openapi-gen/queries/ensureQueryData.ts index f02690c160ba0..ac664436994a6 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/queries/ensureQueryData.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/queries/ensureQueryData.ts @@ -2527,3 +2527,143 @@ export const ensureUseGridServiceGridDataData = ( state, }), }); +/** + * Get Dag Structure + * Return dag structure for grid view. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridNodeResponse Successful Response + * @throws ApiError + */ +export const ensureUseGridServiceGetDagStructureData = ( + queryClient: QueryClient, + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, +) => + queryClient.ensureQueryData({ + queryKey: Common.UseGridServiceGetDagStructureKeyFn({ + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }), + queryFn: () => GridService.getDagStructure({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }), + }); +/** + * Get Grid Runs + * Get info about a run for the grid. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridRunsResponse Successful Response + * @throws ApiError + */ +export const ensureUseGridServiceGetGridRunsData = ( + queryClient: QueryClient, + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, +) => + queryClient.ensureQueryData({ + queryKey: Common.UseGridServiceGetGridRunsKeyFn({ + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }), + queryFn: () => GridService.getGridRuns({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }), + }); +/** + * Get Grid Ti Summaries + * Get states for TIs / "groups" of TIs. + * + * Essentially this is to know what color to put in the squares in the grid. + * + * The tricky part here is that we aggregate the state for groups and mapped tasks. + * + * We don't add all the TIs for mapped TIs -- we only add one entry for the mapped task and + * its state is an aggregate of its TI states. + * + * And for task groups, we add a "task" for that which is not really a task but is just + * an entry that represents the group (so that we can show a filled in box when the group + * is not expanded) and its state is an agg of those within it. + * @param data The data for the request. + * @param data.dagId + * @param data.runId + * @returns GridTISummaries Successful Response + * @throws ApiError + */ +export const ensureUseGridServiceGetGridTiSummariesData = ( + queryClient: QueryClient, + { + dagId, + runId, + }: { + dagId: string; + runId: string; + }, +) => + queryClient.ensureQueryData({ + queryKey: Common.UseGridServiceGetGridTiSummariesKeyFn({ dagId, runId }), + queryFn: () => GridService.getGridTiSummaries({ dagId, runId }), + }); +/** + * Get Latest Run + * Get information about the latest dag run by run_after. + * + * This is used by the UI to figure out if it needs to rerun queries and resume auto refresh. + * @param data The data for the request. + * @param data.dagId + * @returns unknown Successful Response + * @throws ApiError + */ +export const ensureUseGridServiceGetLatestRunData = ( + queryClient: QueryClient, + { + dagId, + }: { + dagId: string; + }, +) => + queryClient.ensureQueryData({ + queryKey: Common.UseGridServiceGetLatestRunKeyFn({ dagId }), + queryFn: () => GridService.getLatestRun({ dagId }), + }); diff --git a/airflow-core/src/airflow/ui/openapi-gen/queries/prefetch.ts b/airflow-core/src/airflow/ui/openapi-gen/queries/prefetch.ts index b9039a10f3719..dfb13f3e0a145 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/queries/prefetch.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/queries/prefetch.ts @@ -2527,3 +2527,143 @@ export const prefetchUseGridServiceGridData = ( state, }), }); +/** + * Get Dag Structure + * Return dag structure for grid view. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridNodeResponse Successful Response + * @throws ApiError + */ +export const prefetchUseGridServiceGetDagStructure = ( + queryClient: QueryClient, + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseGridServiceGetDagStructureKeyFn({ + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }), + queryFn: () => GridService.getDagStructure({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }), + }); +/** + * Get Grid Runs + * Get info about a run for the grid. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridRunsResponse Successful Response + * @throws ApiError + */ +export const prefetchUseGridServiceGetGridRuns = ( + queryClient: QueryClient, + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseGridServiceGetGridRunsKeyFn({ + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }), + queryFn: () => GridService.getGridRuns({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }), + }); +/** + * Get Grid Ti Summaries + * Get states for TIs / "groups" of TIs. + * + * Essentially this is to know what color to put in the squares in the grid. + * + * The tricky part here is that we aggregate the state for groups and mapped tasks. + * + * We don't add all the TIs for mapped TIs -- we only add one entry for the mapped task and + * its state is an aggregate of its TI states. + * + * And for task groups, we add a "task" for that which is not really a task but is just + * an entry that represents the group (so that we can show a filled in box when the group + * is not expanded) and its state is an agg of those within it. + * @param data The data for the request. + * @param data.dagId + * @param data.runId + * @returns GridTISummaries Successful Response + * @throws ApiError + */ +export const prefetchUseGridServiceGetGridTiSummaries = ( + queryClient: QueryClient, + { + dagId, + runId, + }: { + dagId: string; + runId: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseGridServiceGetGridTiSummariesKeyFn({ dagId, runId }), + queryFn: () => GridService.getGridTiSummaries({ dagId, runId }), + }); +/** + * Get Latest Run + * Get information about the latest dag run by run_after. + * + * This is used by the UI to figure out if it needs to rerun queries and resume auto refresh. + * @param data The data for the request. + * @param data.dagId + * @returns unknown Successful Response + * @throws ApiError + */ +export const prefetchUseGridServiceGetLatestRun = ( + queryClient: QueryClient, + { + dagId, + }: { + dagId: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseGridServiceGetLatestRunKeyFn({ dagId }), + queryFn: () => GridService.getLatestRun({ dagId }), + }); diff --git a/airflow-core/src/airflow/ui/openapi-gen/queries/queries.ts b/airflow-core/src/airflow/ui/openapi-gen/queries/queries.ts index 30b49d52aa330..43f56ec550da5 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/queries/queries.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/queries/queries.ts @@ -3018,6 +3018,164 @@ export const useGridServiceGridData = < }) as TData, ...options, }); +/** + * Get Dag Structure + * Return dag structure for grid view. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridNodeResponse Successful Response + * @throws ApiError + */ +export const useGridServiceGetDagStructure = < + TData = Common.GridServiceGetDagStructureDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseGridServiceGetDagStructureKeyFn( + { dagId, limit, offset, orderBy, runAfterGte, runAfterLte }, + queryKey, + ), + queryFn: () => + GridService.getDagStructure({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }) as TData, + ...options, + }); +/** + * Get Grid Runs + * Get info about a run for the grid. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridRunsResponse Successful Response + * @throws ApiError + */ +export const useGridServiceGetGridRuns = < + TData = Common.GridServiceGetGridRunsDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseGridServiceGetGridRunsKeyFn( + { dagId, limit, offset, orderBy, runAfterGte, runAfterLte }, + queryKey, + ), + queryFn: () => + GridService.getGridRuns({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }) as TData, + ...options, + }); +/** + * Get Grid Ti Summaries + * Get states for TIs / "groups" of TIs. + * + * Essentially this is to know what color to put in the squares in the grid. + * + * The tricky part here is that we aggregate the state for groups and mapped tasks. + * + * We don't add all the TIs for mapped TIs -- we only add one entry for the mapped task and + * its state is an aggregate of its TI states. + * + * And for task groups, we add a "task" for that which is not really a task but is just + * an entry that represents the group (so that we can show a filled in box when the group + * is not expanded) and its state is an agg of those within it. + * @param data The data for the request. + * @param data.dagId + * @param data.runId + * @returns GridTISummaries Successful Response + * @throws ApiError + */ +export const useGridServiceGetGridTiSummaries = < + TData = Common.GridServiceGetGridTiSummariesDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + runId, + }: { + dagId: string; + runId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseGridServiceGetGridTiSummariesKeyFn({ dagId, runId }, queryKey), + queryFn: () => GridService.getGridTiSummaries({ dagId, runId }) as TData, + ...options, + }); +/** + * Get Latest Run + * Get information about the latest dag run by run_after. + * + * This is used by the UI to figure out if it needs to rerun queries and resume auto refresh. + * @param data The data for the request. + * @param data.dagId + * @returns unknown Successful Response + * @throws ApiError + */ +export const useGridServiceGetLatestRun = < + TData = Common.GridServiceGetLatestRunDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + }: { + dagId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseGridServiceGetLatestRunKeyFn({ dagId }, queryKey), + queryFn: () => GridService.getLatestRun({ dagId }) as TData, + ...options, + }); /** * Create Asset Event * Create asset events. diff --git a/airflow-core/src/airflow/ui/openapi-gen/queries/suspense.ts b/airflow-core/src/airflow/ui/openapi-gen/queries/suspense.ts index d525b0a662c39..3dfcb2c175505 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/queries/suspense.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/queries/suspense.ts @@ -2995,3 +2995,161 @@ export const useGridServiceGridDataSuspense = < }) as TData, ...options, }); +/** + * Get Dag Structure + * Return dag structure for grid view. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridNodeResponse Successful Response + * @throws ApiError + */ +export const useGridServiceGetDagStructureSuspense = < + TData = Common.GridServiceGetDagStructureDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseGridServiceGetDagStructureKeyFn( + { dagId, limit, offset, orderBy, runAfterGte, runAfterLte }, + queryKey, + ), + queryFn: () => + GridService.getDagStructure({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }) as TData, + ...options, + }); +/** + * Get Grid Runs + * Get info about a run for the grid. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridRunsResponse Successful Response + * @throws ApiError + */ +export const useGridServiceGetGridRunsSuspense = < + TData = Common.GridServiceGetGridRunsDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + limit, + offset, + orderBy, + runAfterGte, + runAfterLte, + }: { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string; + runAfterLte?: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseGridServiceGetGridRunsKeyFn( + { dagId, limit, offset, orderBy, runAfterGte, runAfterLte }, + queryKey, + ), + queryFn: () => + GridService.getGridRuns({ dagId, limit, offset, orderBy, runAfterGte, runAfterLte }) as TData, + ...options, + }); +/** + * Get Grid Ti Summaries + * Get states for TIs / "groups" of TIs. + * + * Essentially this is to know what color to put in the squares in the grid. + * + * The tricky part here is that we aggregate the state for groups and mapped tasks. + * + * We don't add all the TIs for mapped TIs -- we only add one entry for the mapped task and + * its state is an aggregate of its TI states. + * + * And for task groups, we add a "task" for that which is not really a task but is just + * an entry that represents the group (so that we can show a filled in box when the group + * is not expanded) and its state is an agg of those within it. + * @param data The data for the request. + * @param data.dagId + * @param data.runId + * @returns GridTISummaries Successful Response + * @throws ApiError + */ +export const useGridServiceGetGridTiSummariesSuspense = < + TData = Common.GridServiceGetGridTiSummariesDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + runId, + }: { + dagId: string; + runId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseGridServiceGetGridTiSummariesKeyFn({ dagId, runId }, queryKey), + queryFn: () => GridService.getGridTiSummaries({ dagId, runId }) as TData, + ...options, + }); +/** + * Get Latest Run + * Get information about the latest dag run by run_after. + * + * This is used by the UI to figure out if it needs to rerun queries and resume auto refresh. + * @param data The data for the request. + * @param data.dagId + * @returns unknown Successful Response + * @throws ApiError + */ +export const useGridServiceGetLatestRunSuspense = < + TData = Common.GridServiceGetLatestRunDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + }: { + dagId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseGridServiceGetLatestRunKeyFn({ dagId }, queryKey), + queryFn: () => GridService.getLatestRun({ dagId }) as TData, + ...options, + }); diff --git a/airflow-core/src/airflow/ui/openapi-gen/queries/useDagsInfinite.ts b/airflow-core/src/airflow/ui/openapi-gen/queries/useDagsInfinite.ts new file mode 100644 index 0000000000000..2776977ce46d3 --- /dev/null +++ b/airflow-core/src/airflow/ui/openapi-gen/queries/useDagsInfinite.ts @@ -0,0 +1,58 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { InfiniteData, useInfiniteQuery, UseInfiniteQueryOptions } from "@tanstack/react-query"; + +import { DagService } from "openapi/requests/services.gen"; +import { DAGTagCollectionResponse } from "openapi/requests/types.gen"; + +import * as Common from "./common"; + +export const useDagTagsInfinite = = unknown[]>( + { + limit, + orderBy, + tagNamePattern, + }: { + limit?: number; + orderBy?: string; + tagNamePattern?: string; + } = {}, + queryKey?: TQueryKey, + options?: Omit< + UseInfiniteQueryOptions< + DAGTagCollectionResponse, + TError, + InfiniteData, + DAGTagCollectionResponse, + unknown[], + number + >, + "queryKey" | "queryFn" + >, +) => + useInfiniteQuery({ + queryKey: Common.UseDagServiceGetDagTagsKeyFn({ limit, orderBy, tagNamePattern }, queryKey), + queryFn: ({ pageParam }) => DagService.getDagTags({ limit, offset: pageParam, orderBy, tagNamePattern }), + initialPageParam: 0, + getNextPageParam: (lastPage, _allPages, lastPageParam, _allPageParams) => + lastPageParam < lastPage.total_entries ? lastPage.tags.length + lastPageParam : undefined, + getPreviousPageParam: (firstPage, _allPages, firstPageParam, _allPageParams) => + firstPageParam > 0 ? -firstPage.tags.length + firstPageParam : undefined, + ...options, + }); diff --git a/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts b/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts index 9562def60ea22..ec6c47e14f91d 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/requests/schemas.gen.ts @@ -6546,6 +6546,60 @@ export const $GridDAGRunwithTIs = { description: "DAG Run model for the Grid UI.", } as const; +export const $GridNodeResponse = { + properties: { + id: { + type: "string", + title: "Id", + }, + label: { + type: "string", + title: "Label", + }, + children: { + anyOf: [ + { + items: { + $ref: "#/components/schemas/GridNodeResponse", + }, + type: "array", + }, + { + type: "null", + }, + ], + title: "Children", + }, + is_mapped: { + anyOf: [ + { + type: "boolean", + }, + { + type: "null", + }, + ], + title: "Is Mapped", + }, + setup_teardown_type: { + anyOf: [ + { + type: "string", + enum: ["setup", "teardown"], + }, + { + type: "null", + }, + ], + title: "Setup Teardown Type", + }, + }, + type: "object", + required: ["id", "label", "is_mapped"], + title: "GridNodeResponse", + description: "Base Node serializer for responses.", +} as const; + export const $GridResponse = { properties: { dag_runs: { @@ -6555,16 +6609,130 @@ export const $GridResponse = { type: "array", title: "Dag Runs", }, - structure: { - $ref: "#/components/schemas/StructureDataResponse", - }, }, type: "object", - required: ["dag_runs", "structure"], + required: ["dag_runs"], title: "GridResponse", description: "Response model for the Grid UI.", } as const; +export const $GridRunsResponse = { + properties: { + dag_id: { + type: "string", + title: "Dag Id", + }, + run_id: { + type: "string", + title: "Run Id", + }, + queued_at: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Queued At", + }, + start_date: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Start Date", + }, + end_date: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "End Date", + }, + run_after: { + type: "string", + format: "date-time", + title: "Run After", + }, + state: { + anyOf: [ + { + $ref: "#/components/schemas/TaskInstanceState", + }, + { + type: "null", + }, + ], + }, + run_type: { + $ref: "#/components/schemas/DagRunType", + }, + duration: { + anyOf: [ + { + type: "integer", + }, + { + type: "null", + }, + ], + title: "Duration", + readOnly: true, + }, + }, + type: "object", + required: [ + "dag_id", + "run_id", + "queued_at", + "start_date", + "end_date", + "run_after", + "state", + "run_type", + "duration", + ], + title: "GridRunsResponse", + description: "Base Node serializer for responses.", +} as const; + +export const $GridTISummaries = { + properties: { + run_id: { + type: "string", + title: "Run Id", + }, + dag_id: { + type: "string", + title: "Dag Id", + }, + task_instances: { + items: { + $ref: "#/components/schemas/LightGridTaskInstanceSummary", + }, + type: "array", + title: "Task Instances", + }, + }, + type: "object", + required: ["run_id", "dag_id", "task_instances"], + title: "GridTISummaries", + description: "DAG Run model for the Grid UI.", +} as const; + export const $GridTaskInstanceSummary = { properties: { task_id: { @@ -6685,6 +6853,55 @@ export const $HistoricalMetricDataResponse = { description: "Historical Metric Data serializer for responses.", } as const; +export const $LatestRunResponse = { + properties: { + id: { + type: "integer", + title: "Id", + }, + dag_id: { + type: "string", + title: "Dag Id", + }, + run_id: { + type: "string", + title: "Run Id", + }, + run_after: { + type: "string", + format: "date-time", + title: "Run After", + }, + }, + type: "object", + required: ["id", "dag_id", "run_id", "run_after"], + title: "LatestRunResponse", + description: "Base Node serializer for responses.", +} as const; + +export const $LightGridTaskInstanceSummary = { + properties: { + task_id: { + type: "string", + title: "Task Id", + }, + state: { + anyOf: [ + { + $ref: "#/components/schemas/TaskInstanceState", + }, + { + type: "null", + }, + ], + }, + }, + type: "object", + required: ["task_id", "state"], + title: "LightGridTaskInstanceSummary", + description: "Task Instance Summary model for the Grid UI.", +} as const; + export const $MenuItem = { type: "string", enum: [ diff --git a/airflow-core/src/airflow/ui/openapi-gen/requests/services.gen.ts b/airflow-core/src/airflow/ui/openapi-gen/requests/services.gen.ts index cb7cf86d49c82..77cd149a68c12 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/requests/services.gen.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/requests/services.gen.ts @@ -216,6 +216,14 @@ import type { StructureDataResponse2, GridDataData, GridDataResponse, + GetDagStructureData, + GetDagStructureResponse, + GetGridRunsData, + GetGridRunsResponse, + GetGridTiSummariesData, + GetGridTiSummariesResponse, + GetLatestRunData, + GetLatestRunResponse, } from "./types.gen"; export class AssetService { @@ -3594,4 +3602,137 @@ export class GridService { }, }); } + + /** + * Get Dag Structure + * Return dag structure for grid view. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridNodeResponse Successful Response + * @throws ApiError + */ + public static getDagStructure(data: GetDagStructureData): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/ui/grid/structure/{dag_id}", + path: { + dag_id: data.dagId, + }, + query: { + offset: data.offset, + limit: data.limit, + order_by: data.orderBy, + run_after_gte: data.runAfterGte, + run_after_lte: data.runAfterLte, + }, + errors: { + 400: "Bad Request", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Get Grid Runs + * Get info about a run for the grid. + * @param data The data for the request. + * @param data.dagId + * @param data.offset + * @param data.limit + * @param data.orderBy + * @param data.runAfterGte + * @param data.runAfterLte + * @returns GridRunsResponse Successful Response + * @throws ApiError + */ + public static getGridRuns(data: GetGridRunsData): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/ui/grid/runs/{dag_id}", + path: { + dag_id: data.dagId, + }, + query: { + offset: data.offset, + limit: data.limit, + order_by: data.orderBy, + run_after_gte: data.runAfterGte, + run_after_lte: data.runAfterLte, + }, + errors: { + 400: "Bad Request", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Get Grid Ti Summaries + * Get states for TIs / "groups" of TIs. + * + * Essentially this is to know what color to put in the squares in the grid. + * + * The tricky part here is that we aggregate the state for groups and mapped tasks. + * + * We don't add all the TIs for mapped TIs -- we only add one entry for the mapped task and + * its state is an aggregate of its TI states. + * + * And for task groups, we add a "task" for that which is not really a task but is just + * an entry that represents the group (so that we can show a filled in box when the group + * is not expanded) and its state is an agg of those within it. + * @param data The data for the request. + * @param data.dagId + * @param data.runId + * @returns GridTISummaries Successful Response + * @throws ApiError + */ + public static getGridTiSummaries( + data: GetGridTiSummariesData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/ui/grid/ti_summaries/{dag_id}/{run_id}", + path: { + dag_id: data.dagId, + run_id: data.runId, + }, + errors: { + 400: "Bad Request", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Get Latest Run + * Get information about the latest dag run by run_after. + * + * This is used by the UI to figure out if it needs to rerun queries and resume auto refresh. + * @param data The data for the request. + * @param data.dagId + * @returns unknown Successful Response + * @throws ApiError + */ + public static getLatestRun(data: GetLatestRunData): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/ui/grid/latest_run/{dag_id}", + path: { + dag_id: data.dagId, + }, + errors: { + 400: "Bad Request", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } } diff --git a/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts b/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts index b04ce36ef78d4..0de72f72ce5f0 100644 --- a/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts +++ b/airflow-core/src/airflow/ui/openapi-gen/requests/types.gen.ts @@ -1604,12 +1604,46 @@ export type GridDAGRunwithTIs = { task_instances: Array; }; +/** + * Base Node serializer for responses. + */ +export type GridNodeResponse = { + id: string; + label: string; + children?: Array | null; + is_mapped: boolean | null; + setup_teardown_type?: "setup" | "teardown" | null; +}; + /** * Response model for the Grid UI. */ export type GridResponse = { dag_runs: Array; - structure: StructureDataResponse; +}; + +/** + * Base Node serializer for responses. + */ +export type GridRunsResponse = { + dag_id: string; + run_id: string; + queued_at: string | null; + start_date: string | null; + end_date: string | null; + run_after: string; + state: TaskInstanceState | null; + run_type: DagRunType; + readonly duration: number | null; +}; + +/** + * DAG Run model for the Grid UI. + */ +export type GridTISummaries = { + run_id: string; + dag_id: string; + task_instances: Array; }; /** @@ -1638,6 +1672,24 @@ export type HistoricalMetricDataResponse = { task_instance_states: TaskInstanceStateCount; }; +/** + * Base Node serializer for responses. + */ +export type LatestRunResponse = { + id: number; + dag_id: string; + run_id: string; + run_after: string; +}; + +/** + * Task Instance Summary model for the Grid UI. + */ +export type LightGridTaskInstanceSummary = { + task_id: string; + state: TaskInstanceState | null; +}; + /** * Define all menu items defined in the menu. */ @@ -2669,6 +2721,41 @@ export type GridDataData = { export type GridDataResponse = GridResponse; +export type GetDagStructureData = { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string | null; + runAfterLte?: string | null; +}; + +export type GetDagStructureResponse = Array; + +export type GetGridRunsData = { + dagId: string; + limit?: number; + offset?: number; + orderBy?: string; + runAfterGte?: string | null; + runAfterLte?: string | null; +}; + +export type GetGridRunsResponse = Array; + +export type GetGridTiSummariesData = { + dagId: string; + runId: string; +}; + +export type GetGridTiSummariesResponse = GridTISummaries; + +export type GetLatestRunData = { + dagId: string; +}; + +export type GetLatestRunResponse = LatestRunResponse | null; + export type $OpenApiTs = { "/api/v2/assets": { get: { @@ -5505,4 +5592,96 @@ export type $OpenApiTs = { }; }; }; + "/ui/grid/structure/{dag_id}": { + get: { + req: GetDagStructureData; + res: { + /** + * Successful Response + */ + 200: Array; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/ui/grid/runs/{dag_id}": { + get: { + req: GetGridRunsData; + res: { + /** + * Successful Response + */ + 200: Array; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/ui/grid/ti_summaries/{dag_id}/{run_id}": { + get: { + req: GetGridTiSummariesData; + res: { + /** + * Successful Response + */ + 200: GridTISummaries; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/ui/grid/latest_run/{dag_id}": { + get: { + req: GetLatestRunData; + res: { + /** + * Successful Response + */ + 200: LatestRunResponse | null; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; }; diff --git a/airflow-core/src/airflow/ui/openapi-merge.json b/airflow-core/src/airflow/ui/openapi-merge.json index 7b4bcb75ca15f..059cadf6fd507 100644 --- a/airflow-core/src/airflow/ui/openapi-merge.json +++ b/airflow-core/src/airflow/ui/openapi-merge.json @@ -1,7 +1,7 @@ { "inputs": [ { - "inputFile": "../api_fastapi/core_api/openapi/v1-rest-api-generated.yaml" + "inputFile": "../api_fastapi/core_api/openapi/v2-rest-api-generated.yaml" }, { "inputFile": "../api_fastapi/core_api/openapi/_private_ui.yaml" diff --git a/airflow-core/src/airflow/ui/src/components/Clear/TaskInstance/ClearTaskInstanceDialog.tsx b/airflow-core/src/airflow/ui/src/components/Clear/TaskInstance/ClearTaskInstanceDialog.tsx index 3d251138ff15b..e8202304281ac 100644 --- a/airflow-core/src/airflow/ui/src/components/Clear/TaskInstance/ClearTaskInstanceDialog.tsx +++ b/airflow-core/src/airflow/ui/src/components/Clear/TaskInstance/ClearTaskInstanceDialog.tsx @@ -103,6 +103,7 @@ const ClearTaskInstanceDialog = ({ onClose, open, taskInstance }: Props) => { { return values === undefined ? 0 : values.reduce((initial, next) => initial + next, 0) / values.length; }; -type RunResponse = DAGRunResponse | TaskInstanceResponse; +type RunResponse = GridRunsResponse | TaskInstanceResponse; const getDuration = (start: string, end: string | null) => dayjs.duration(dayjs(end).diff(start)).asSeconds(); @@ -108,7 +108,7 @@ export const DurationChart = ({ data: entries.map((entry: RunResponse) => { switch (kind) { case "Dag Run": { - const run = entry as DAGRunResponse; + const run = entry as GridRunsResponse; return run.queued_at !== null && run.start_date !== null && run.queued_at < run.start_date ? Number(getDuration(run.queued_at, run.start_date)) @@ -151,18 +151,19 @@ export const DurationChart = ({ return; } - const entry = entries[element.index]; - const baseUrl = `/dags/${entry?.dag_id}/runs/${entry?.dag_run_id}`; - switch (kind) { case "Dag Run": { + const entry = entries[element.index] as GridRunsResponse | undefined; + const baseUrl = `/dags/${entry?.dag_id}/runs/${entry?.run_id}`; + navigate(baseUrl); break; } case "Task Instance": { - const taskInstance = entry as TaskInstanceResponse; + const entry = entries[element.index] as TaskInstanceResponse | undefined; + const baseUrl = `/dags/${entry?.dag_id}/runs/${entry?.dag_run_id}`; - navigate(`${baseUrl}/tasks/${taskInstance.task_id}`); + navigate(`${baseUrl}/tasks/${entry?.task_id}`); break; } default: diff --git a/airflow-core/src/airflow/ui/src/components/MarkAs/TaskInstance/MarkTaskInstanceAsDialog.tsx b/airflow-core/src/airflow/ui/src/components/MarkAs/TaskInstance/MarkTaskInstanceAsDialog.tsx index b22992d39f973..8411be4407668 100644 --- a/airflow-core/src/airflow/ui/src/components/MarkAs/TaskInstance/MarkTaskInstanceAsDialog.tsx +++ b/airflow-core/src/airflow/ui/src/components/MarkAs/TaskInstance/MarkTaskInstanceAsDialog.tsx @@ -98,6 +98,7 @@ const MarkTaskInstanceAsDialog = ({ onClose, open, state, taskInstance }: Props) - {index} - , - ); - if (Boolean(timestamp)) { elements.push("[", - + ); }; diff --git a/airflow-core/src/airflow/ui/src/layouts/Details/Grid/Grid.tsx b/airflow-core/src/airflow/ui/src/layouts/Details/Grid/Grid.tsx index 75fe7209dc8e6..d4e69ba1a7006 100644 --- a/airflow-core/src/airflow/ui/src/layouts/Details/Grid/Grid.tsx +++ b/airflow-core/src/airflow/ui/src/layouts/Details/Grid/Grid.tsx @@ -19,18 +19,21 @@ import { Box, Flex, IconButton } from "@chakra-ui/react"; import dayjs from "dayjs"; import dayjsDuration from "dayjs/plugin/duration"; -import { useMemo } from "react"; +import { useEffect, useMemo, useState } from "react"; import { FiChevronsRight } from "react-icons/fi"; import { Link, useParams } from "react-router-dom"; +import type { GridRunsResponse } from "openapi/requests"; import { useOpenGroups } from "src/context/openGroups"; -import { useGrid } from "src/queries/useGrid"; +import { useGridRuns } from "src/queries/useGridRuns.ts"; +import { useGridStructure } from "src/queries/useGridStructure.ts"; +import { isStatePending } from "src/utils"; import { Bar } from "./Bar"; import { DurationAxis } from "./DurationAxis"; import { DurationTick } from "./DurationTick"; import { TaskNames } from "./TaskNames"; -import { flattenNodes, type RunWithDuration } from "./utils"; +import { flattenNodes } from "./utils"; dayjs.extend(dayjsDuration); @@ -39,36 +42,46 @@ type Props = { }; export const Grid = ({ limit }: Props) => { + const [selectedIsVisible, setSelectedIsVisible] = useState(); + const [hasActiveRun, setHasActiveRun] = useState(); const { openGroupIds } = useOpenGroups(); - const { dagId = "" } = useParams(); + const { dagId = "", runId = "" } = useParams(); - const { data: gridData, isLoading, runAfter } = useGrid(limit); + const { data: gridRuns, isLoading } = useGridRuns({ limit }); - const runs: Array = useMemo( - () => - (gridData?.dag_runs ?? []).map((run) => { - const duration = dayjs - .duration(dayjs(run.end_date ?? undefined).diff(run.start_date ?? undefined)) - .asSeconds(); + // Check if the selected dag run is inside of the grid response, if not, we'll update the grid filters + // Eventually we should redo the api endpoint to make this work better + useEffect(() => { + if (gridRuns && runId) { + const run = gridRuns.find((dr: GridRunsResponse) => dr.run_id === runId); - return { - ...run, - duration, - }; - }), - [gridData?.dag_runs], - ); + if (!run) { + setSelectedIsVisible(false); + } + } + }, [runId, gridRuns, selectedIsVisible, setSelectedIsVisible]); + + useEffect(() => { + if (gridRuns) { + const run = gridRuns.some((dr: GridRunsResponse) => isStatePending(dr.state)); + + if (!run) { + setHasActiveRun(false); + } + } + }, [gridRuns, setHasActiveRun]); + const { data: dagStructure } = useGridStructure({ hasActiveRun, limit }); // calculate dag run bar heights relative to max const max = Math.max.apply( undefined, - runs.map((dr) => dr.duration), - ); - - const { flatNodes } = useMemo( - () => flattenNodes(gridData === undefined ? [] : gridData.structure.nodes, openGroupIds), - [gridData, openGroupIds], + gridRuns === undefined + ? [] + : gridRuns + .map((dr: GridRunsResponse) => dr.duration) + .filter((duration: number | null): duration is number => duration !== null), ); + const { flatNodes } = useMemo(() => flattenNodes(dagStructure, openGroupIds), [dagStructure, openGroupIds]); return ( @@ -81,7 +94,7 @@ export const Grid = ({ limit }: Props) => { - {Boolean(runs.length) && ( + {Boolean(gridRuns?.length) && ( <> {Math.floor(max)}s {Math.floor(max / 2)}s @@ -90,11 +103,11 @@ export const Grid = ({ limit }: Props) => { )} - {runs.map((dr) => ( - + {gridRuns?.map((dr: GridRunsResponse) => ( + ))} - {runAfter === undefined ? undefined : ( + {selectedIsVisible === undefined || !selectedIsVisible ? undefined : ( ; readonly runId: string; - readonly taskInstances: Array; + readonly taskInstances: Array; }; export const TaskInstancesColumn = ({ nodes, runId, taskInstances }: Props) => { @@ -37,6 +37,7 @@ export const TaskInstancesColumn = ({ nodes, runId, taskInstances }: Props) => { const search = searchParams.toString(); return nodes.map((node) => { + // todo: how does this work with mapped? same task id for multiple tis const taskInstance = taskInstances.find((ti) => ti.task_id === node.id); if (!taskInstance) { diff --git a/airflow-core/src/airflow/ui/src/layouts/Details/Grid/utils.ts b/airflow-core/src/airflow/ui/src/layouts/Details/Grid/utils.ts index 4b17098faff63..d278597267a14 100644 --- a/airflow-core/src/airflow/ui/src/layouts/Details/Grid/utils.ts +++ b/airflow-core/src/airflow/ui/src/layouts/Details/Grid/utils.ts @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -import type { GridDAGRunwithTIs, NodeResponse } from "openapi/requests/types.gen"; +import type { GridDAGRunwithTIs, GridNodeResponse } from "openapi/requests/types.gen"; export type RunWithDuration = { duration: number; @@ -26,31 +26,33 @@ export type GridTask = { depth: number; isGroup?: boolean; isOpen?: boolean; -} & NodeResponse; +} & GridNodeResponse; -export const flattenNodes = (nodes: Array, openGroupIds: Array, depth: number = 0) => { +export const flattenNodes = ( + nodes: Array | undefined, + openGroupIds: Array, + depth: number = 0, +) => { let flatNodes: Array = []; let allGroupIds: Array = []; - nodes.forEach((node) => { - if (node.type === "task") { - if (node.children) { - const { children, ...rest } = node; - - flatNodes.push({ ...rest, depth, isGroup: true, isOpen: openGroupIds.includes(node.id) }); - allGroupIds.push(node.id); - - const { allGroupIds: childGroupIds, flatNodes: childNodes } = flattenNodes( - children, - openGroupIds, - depth + 1, - ); - - flatNodes = [...flatNodes, ...(openGroupIds.includes(node.id) ? childNodes : [])]; - allGroupIds = [...allGroupIds, ...childGroupIds]; - } else { - flatNodes.push({ ...node, depth }); - } + nodes?.forEach((node) => { + if (node.children) { + const { children, ...rest } = node; + + flatNodes.push({ ...rest, depth, isGroup: true, isOpen: openGroupIds.includes(node.id) }); + allGroupIds.push(node.id); + + const { allGroupIds: childGroupIds, flatNodes: childNodes } = flattenNodes( + children, + openGroupIds, + depth + 1, + ); + + flatNodes = [...flatNodes, ...(openGroupIds.includes(node.id) ? childNodes : [])]; + allGroupIds = [...allGroupIds, ...childGroupIds]; + } else { + flatNodes.push({ ...node, depth }); } }); diff --git a/airflow-core/src/airflow/ui/src/layouts/Details/ToggleGroups.tsx b/airflow-core/src/airflow/ui/src/layouts/Details/ToggleGroups.tsx index 8b2ebcefe2d88..cb740cc9e9831 100644 --- a/airflow-core/src/airflow/ui/src/layouts/Details/ToggleGroups.tsx +++ b/airflow-core/src/airflow/ui/src/layouts/Details/ToggleGroups.tsx @@ -20,22 +20,21 @@ import { type ButtonGroupProps, IconButton, ButtonGroup } from "@chakra-ui/react import { useMemo } from "react"; import { MdExpand, MdCompress } from "react-icons/md"; import { useParams } from "react-router-dom"; +import { useLocalStorage } from "usehooks-ts"; -import { useStructureServiceStructureData } from "openapi/queries"; import { useOpenGroups } from "src/context/openGroups"; +import { useGridStructure } from "src/queries/useGridStructure.ts"; import { flattenNodes } from "./Grid/utils"; export const ToggleGroups = (props: ButtonGroupProps) => { - const { dagId = "" } = useParams(); - const { data: structure } = useStructureServiceStructureData({ - dagId, - }); const { openGroupIds, setOpenGroupIds } = useOpenGroups(); - + const { dagId = "" } = useParams(); + const [limit] = useLocalStorage(`dag_runs_limit-${dagId}`, 10); + const { data: dagStructure } = useGridStructure({ limit }); const { allGroupIds } = useMemo( - () => flattenNodes(structure?.nodes ?? [], openGroupIds), - [structure?.nodes, openGroupIds], + () => flattenNodes(dagStructure, openGroupIds), + [dagStructure, openGroupIds], ); // Don't show button if the DAG has no task groups diff --git a/airflow-core/src/airflow/ui/src/layouts/Nav/TimezoneSelector.tsx b/airflow-core/src/airflow/ui/src/layouts/Nav/TimezoneSelector.tsx index 20dd09af396de..d7cc615b874aa 100644 --- a/airflow-core/src/airflow/ui/src/layouts/Nav/TimezoneSelector.tsx +++ b/airflow-core/src/airflow/ui/src/layouts/Nav/TimezoneSelector.tsx @@ -21,7 +21,7 @@ import { Select, type SingleValue } from "chakra-react-select"; import dayjs from "dayjs"; import timezone from "dayjs/plugin/timezone"; import utc from "dayjs/plugin/utc"; -import React, { useMemo } from "react"; +import React, { useEffect, useMemo, useState } from "react"; import { useTimezone } from "src/context/timezone"; import type { Option as TimezoneOption } from "src/utils/option"; @@ -31,6 +31,7 @@ dayjs.extend(timezone); const TimezoneSelector: React.FC = () => { const { selectedTimezone, setSelectedTimezone } = useTimezone(); + const [currentTime, setCurrentTime] = useState(""); const timezones = useMemo>(() => { const tzList = Intl.supportedValuesOf("timeZone"); const guessedTz = dayjs.tz.guess(); @@ -54,7 +55,17 @@ const TimezoneSelector: React.FC = () => { } }; - const currentTime = dayjs().tz(selectedTimezone).format("YYYY-MM-DD HH:mm:ss"); + useEffect(() => { + const updateTime = () => { + setCurrentTime(dayjs().tz(selectedTimezone).format("YYYY-MM-DD HH:mm:ss")); + }; + + updateTime(); + + const interval = setInterval(updateTime, 1000); + + return () => clearInterval(interval); + }, [selectedTimezone]); return ( diff --git a/airflow-core/src/airflow/ui/src/layouts/Nav/UserSettingsButton.tsx b/airflow-core/src/airflow/ui/src/layouts/Nav/UserSettingsButton.tsx index 91184f2951173..fc035a2b66994 100644 --- a/airflow-core/src/airflow/ui/src/layouts/Nav/UserSettingsButton.tsx +++ b/airflow-core/src/airflow/ui/src/layouts/Nav/UserSettingsButton.tsx @@ -20,7 +20,7 @@ import { useDisclosure } from "@chakra-ui/react"; import dayjs from "dayjs"; import timezone from "dayjs/plugin/timezone"; import utc from "dayjs/plugin/utc"; -import { useState } from "react"; +import { useEffect, useState } from "react"; import { FiClock, FiGrid, FiLogOut, FiMoon, FiSun, FiUser } from "react-icons/fi"; import { MdOutlineAccountTree } from "react-icons/md"; import { useLocalStorage } from "usehooks-ts"; @@ -45,8 +45,20 @@ export const UserSettingsButton = () => { const [time, setTime] = useState(dayjs()); + useEffect(() => { + const updateTime = () => { + setTime(dayjs()); + }; + + updateTime(); + + const interval = setInterval(updateTime, 1000); + + return () => clearInterval(interval); + }, [selectedTimezone]); + return ( - setTime(dayjs())} positioning={{ placement: "right" }}> + } title="User" /> diff --git a/airflow-core/src/airflow/ui/src/pages/Connections/ConnectionForm.tsx b/airflow-core/src/airflow/ui/src/pages/Connections/ConnectionForm.tsx index 6a17f1794f6bf..8452e738445e4 100644 --- a/airflow-core/src/airflow/ui/src/pages/Connections/ConnectionForm.tsx +++ b/airflow-core/src/airflow/ui/src/pages/Connections/ConnectionForm.tsx @@ -49,13 +49,14 @@ const ConnectionForm = ({ const [errors, setErrors] = useState<{ conf?: string }>({}); const { formattedData: connectionTypeMeta, + hookNames: hookNameMap, isPending: isMetaPending, keysList: connectionTypes, } = useConnectionTypeMeta(); const { conf: extra, setConf } = useParamStore(); const { control, - formState: { isValid }, + formState: { isDirty, isValid }, handleSubmit, reset, watch, @@ -91,6 +92,14 @@ const ConnectionForm = ({ mutateConnection(data); }; + const hasChanges = () => { + if (isDirty) { + return true; + } + + return JSON.stringify(JSON.parse(extra)) !== JSON.stringify(JSON.parse(initialConnection.extra)); + }; + const validateAndPrettifyJson = (value: string) => { try { const parsedJson = JSON.parse(value) as JSON; @@ -116,7 +125,7 @@ const ConnectionForm = ({ }; const connTypesOptions = connectionTypes.map((conn) => ({ - label: conn, + label: hookNameMap[conn], value: conn, })); @@ -231,7 +240,7 @@ const ConnectionForm = ({