diff --git a/.github/workflows/ci-image-checks.yml b/.github/workflows/ci-image-checks.yml index 116e4095a7d2d..8e1fc2991ca10 100644 --- a/.github/workflows/ci-image-checks.yml +++ b/.github/workflows/ci-image-checks.yml @@ -303,7 +303,7 @@ jobs: publish-docs: timeout-minutes: 150 - name: "Publish documentation" + name: "Publish documentation and validate versions" permissions: id-token: write contents: read @@ -317,9 +317,6 @@ jobs: INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERBOSE: "true" - if: > - inputs.canary-run == 'true' && - (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') steps: - name: "Cleanup repo" shell: bash @@ -363,6 +360,11 @@ jobs: run: breeze release-management add-back-references docker-stack - name: "Generate back references for helm-chart" run: breeze release-management add-back-references helm-chart + - name: "Validate published doc versions" + id: validate-docs-versions + run: cd ./dev/breeze && uv run ./src/airflow_breeze/utils/docs_version_validation.py + env: + AIRFLOW_SITE_DIRECTORY: /mnt/airflow-site/airflow-site - name: Install AWS CLI v2 run: | curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip @@ -370,14 +372,23 @@ jobs: rm /tmp/awscliv2.zip sudo /tmp/aws/install --update rm -rf /tmp/aws/ + if: > + inputs.canary-run == 'true' && + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4.0.1 with: aws-access-key-id: ${{ secrets.DOCS_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.DOCS_AWS_SECRET_ACCESS_KEY }} aws-region: eu-central-1 + if: > + inputs.canary-run == 'true' && + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') - name: "Upload documentation to AWS S3" run: aws s3 sync --delete ./generated/_build s3://apache-airflow-docs + if: > + inputs.canary-run == 'true' && + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') test-python-api-client: timeout-minutes: 60 diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 4fcb9c261a7cc..52a3b6ae51aca 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -219,6 +219,8 @@ "s3://staging-docs-airflow-apache-org/docs/", ] +PACKAGES_METADATA_EXCLUDE_NAMES = ["docker-stack", "apache-airflow-providers"] + @clearable_cache def all_selective_core_test_types() -> tuple[str, ...]: diff --git a/dev/breeze/src/airflow_breeze/utils/docs_version_validation.py b/dev/breeze/src/airflow_breeze/utils/docs_version_validation.py new file mode 100755 index 0000000000000..4cf388faf360b --- /dev/null +++ b/dev/breeze/src/airflow_breeze/utils/docs_version_validation.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path + +from rich.console import Console + +from airflow_breeze.global_constants import PACKAGES_METADATA_EXCLUDE_NAMES + +console = Console(color_system="standard") + +AIRFLOW_SITE_DIRECTORY = os.environ.get("AIRFLOW_SITE_DIRECTORY") + +error_versions: list[str] = [] + +if AIRFLOW_SITE_DIRECTORY and "docs-archive" not in AIRFLOW_SITE_DIRECTORY: + AIRFLOW_SITE_DIRECTORY = os.path.join(Path(AIRFLOW_SITE_DIRECTORY), "docs-archive") + + +def validate_docs_version() -> None: + """ + Validate the versions of documentation packages in the specified directory. + + This script checks the versions of documentation packages in the published directory + when we publish and add back-references to the documentation. the directory is expected to be structured like: + docs-archive/ + apache-airflow/ + 1.10.0/ + stable/ + stable.txt + apache-airflow-providers-standard/ + 2.0.0/ + stable/ + stable.txt + + If anything found apart from the expected structure, it will cause error to redirects urls or publishing the documentation to s3 + """ + doc_packages = os.listdir(AIRFLOW_SITE_DIRECTORY) + + if not doc_packages: + console.print("[red]No documentation packages found in the specified directory.[/red]") + return + + package_version_map = {} + + for package in doc_packages: + if package in PACKAGES_METADATA_EXCLUDE_NAMES: + console.print(f"[yellow]Skipping excluded package: {package}[/yellow]") + continue + + package_path = os.path.join(str(AIRFLOW_SITE_DIRECTORY), package) + versions = [v for v in os.listdir(package_path) if v != "stable" and v != "stable.txt"] + if versions: + package_version_map[package] = get_all_versions(package, versions) + + if error_versions: + console.print("[red]Errors found in version validation:[/red]") + for error in error_versions: + console.print(f"[red]{error}[/red]") + console.print( + "[blue]These errors could be due to invalid redirects present in the doc packages.[/blue]" + ) + sys.exit(1) + + console.print("[green]All versions validated successfully![/green]") + console.print(f"[blue] {json.dumps(package_version_map, indent=2)} [/blue]") + + +def get_all_versions(package_name: str, versions: list[str]) -> list[str]: + from packaging.version import Version + + good_versions = [] + for version in versions: + try: + Version(version) + good_versions.append(version) + except ValueError as e: + error_versions.append(f"{e} found under doc folder {package_name}") + return sorted( + good_versions, + key=lambda d: Version(d), + ) + + +if __name__ == "__main__": + console.print("[blue]Validating documentation versions...[/blue]") + + if AIRFLOW_SITE_DIRECTORY is None: + console.print( + "[red]AIRFLOW_SITE_DIRECTORY environment variable is not set. " + "Please set it to the directory containing the Airflow site files.[red]" + ) + sys.exit(1) + + validate_docs_version() diff --git a/dev/breeze/src/airflow_breeze/utils/publish_docs_to_s3.py b/dev/breeze/src/airflow_breeze/utils/publish_docs_to_s3.py index 8951da8c1b2e4..4c315bbbe8710 100644 --- a/dev/breeze/src/airflow_breeze/utils/publish_docs_to_s3.py +++ b/dev/breeze/src/airflow_breeze/utils/publish_docs_to_s3.py @@ -24,6 +24,7 @@ import boto3 +from airflow_breeze.global_constants import PACKAGES_METADATA_EXCLUDE_NAMES from airflow_breeze.utils.console import get_console from airflow_breeze.utils.parallel import check_async_run_results, run_with_pool @@ -31,7 +32,6 @@ NON_SHORT_NAME_PACKAGES = ["docker-stack", "helm-chart", "apache-airflow", "task-sdk"] -PACKAGES_METADATA_EXCLUDE_NAMES = ["docker-stack", "apache-airflow-providers"] s3_client = boto3.client("s3") cloudfront_client = boto3.client("cloudfront") diff --git a/dev/breeze/tests/test_docs_version_validation.py b/dev/breeze/tests/test_docs_version_validation.py new file mode 100644 index 0000000000000..504ba8733794c --- /dev/null +++ b/dev/breeze/tests/test_docs_version_validation.py @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +from unittest.mock import patch + +import pytest + +from airflow_breeze.utils.docs_version_validation import error_versions, validate_docs_version + + +class TestValidateDocsVersion: + def setup_method(self): + os.environ["AIRFLOW_SITE_DIRECTORY"] = "/path/to/docs-archive" + error_versions.clear() + + @patch("os.listdir") + @patch("os.path.join") + def test_validate_docs_version_with_invalid_versions(self, mock_path_join, mock_listdir): + mock_listdir.side_effect = [ + ["apache-airflow", "apache-airflow-providers-google"], + ["1.10.0", "stable", "invalid_version"], + ["2.0.0", "stable", "stable.txt"], + ] + mock_path_join.side_effect = lambda *args: "/".join(args) + + with pytest.raises(SystemExit): + validate_docs_version() + assert "Invalid version: 'invalid_version' found under doc folder apache-airflow" in error_versions + + @patch("os.listdir") + @patch("os.path.join") + def test_validate_docs_version_with_valid_versions(self, mock_path_join, mock_listdir): + mock_listdir.side_effect = [ + ["apache-airflow", "apache-airflow-providers-standard"], + ["1.10.0", "stable"], + ["2.0.0", "stable", "stable.txt"], + ] + mock_path_join.side_effect = lambda *args: "/".join(args) + validate_docs_version() + assert not error_versions, f"No errors should be found for valid versions, {error_versions}"