Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions .github/workflows/ci-image-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ jobs:

publish-docs:
timeout-minutes: 150
name: "Publish documentation"
name: "Publish documentation and validate versions"
permissions:
id-token: write
contents: read
Expand All @@ -317,9 +317,6 @@ jobs:
INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}"
PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}"
VERBOSE: "true"
if: >
inputs.canary-run == 'true' &&
(github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
steps:
- name: "Cleanup repo"
shell: bash
Expand Down Expand Up @@ -363,21 +360,35 @@ jobs:
run: breeze release-management add-back-references docker-stack
- name: "Generate back references for helm-chart"
run: breeze release-management add-back-references helm-chart
- name: "Validate published doc versions"
id: validate-docs-versions
run: cd ./dev/breeze && uv run ./src/airflow_breeze/utils/docs_version_validation.py
env:
AIRFLOW_SITE_DIRECTORY: /mnt/airflow-site/airflow-site
- name: Install AWS CLI v2
run: |
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip
unzip -q /tmp/awscliv2.zip -d /tmp
rm /tmp/awscliv2.zip
sudo /tmp/aws/install --update
rm -rf /tmp/aws/
if: >
inputs.canary-run == 'true' &&
(github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4.0.1
with:
aws-access-key-id: ${{ secrets.DOCS_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.DOCS_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-central-1
if: >
inputs.canary-run == 'true' &&
(github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
- name: "Upload documentation to AWS S3"
run: aws s3 sync --delete ./generated/_build s3://apache-airflow-docs
if: >
inputs.canary-run == 'true' &&
(github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')

test-python-api-client:
timeout-minutes: 60
Expand Down
2 changes: 2 additions & 0 deletions dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@
"s3://staging-docs-airflow-apache-org/docs/",
]

PACKAGES_METADATA_EXCLUDE_NAMES = ["docker-stack", "apache-airflow-providers"]


@clearable_cache
def all_selective_core_test_types() -> tuple[str, ...]:
Expand Down
114 changes: 114 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/docs_version_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

import json
import os
import sys
from pathlib import Path

from rich.console import Console

from airflow_breeze.global_constants import PACKAGES_METADATA_EXCLUDE_NAMES

console = Console(color_system="standard")

AIRFLOW_SITE_DIRECTORY = os.environ.get("AIRFLOW_SITE_DIRECTORY")

error_versions: list[str] = []

if AIRFLOW_SITE_DIRECTORY and "docs-archive" not in AIRFLOW_SITE_DIRECTORY:
AIRFLOW_SITE_DIRECTORY = os.path.join(Path(AIRFLOW_SITE_DIRECTORY), "docs-archive")


def validate_docs_version() -> None:
"""
Validate the versions of documentation packages in the specified directory.

This script checks the versions of documentation packages in the published directory
when we publish and add back-references to the documentation. the directory is expected to be structured like:
docs-archive/
apache-airflow/
1.10.0/
stable/
stable.txt
apache-airflow-providers-standard/
2.0.0/
stable/
stable.txt

If anything found apart from the expected structure, it will cause error to redirects urls or publishing the documentation to s3
"""
doc_packages = os.listdir(AIRFLOW_SITE_DIRECTORY)

if not doc_packages:
console.print("[red]No documentation packages found in the specified directory.[/red]")
return

package_version_map = {}

for package in doc_packages:
if package in PACKAGES_METADATA_EXCLUDE_NAMES:
console.print(f"[yellow]Skipping excluded package: {package}[/yellow]")
continue

package_path = os.path.join(str(AIRFLOW_SITE_DIRECTORY), package)
versions = [v for v in os.listdir(package_path) if v != "stable" and v != "stable.txt"]
if versions:
package_version_map[package] = get_all_versions(package, versions)

if error_versions:
console.print("[red]Errors found in version validation:[/red]")
for error in error_versions:
console.print(f"[red]{error}[/red]")
console.print(
"[blue]These errors could be due to invalid redirects present in the doc packages.[/blue]"
)
sys.exit(1)

console.print("[green]All versions validated successfully![/green]")
console.print(f"[blue] {json.dumps(package_version_map, indent=2)} [/blue]")


def get_all_versions(package_name: str, versions: list[str]) -> list[str]:
from packaging.version import Version

good_versions = []
for version in versions:
try:
Version(version)
good_versions.append(version)
except ValueError as e:
error_versions.append(f"{e} found under doc folder {package_name}")
return sorted(
good_versions,
key=lambda d: Version(d),
)


if __name__ == "__main__":
console.print("[blue]Validating documentation versions...[/blue]")

if AIRFLOW_SITE_DIRECTORY is None:
console.print(
"[red]AIRFLOW_SITE_DIRECTORY environment variable is not set. "
"Please set it to the directory containing the Airflow site files.[red]"
)
sys.exit(1)

validate_docs_version()
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@

import boto3

from airflow_breeze.global_constants import PACKAGES_METADATA_EXCLUDE_NAMES
from airflow_breeze.utils.console import get_console
from airflow_breeze.utils.parallel import check_async_run_results, run_with_pool

PROVIDER_NAME_FORMAT = "apache-airflow-providers-{}"

NON_SHORT_NAME_PACKAGES = ["docker-stack", "helm-chart", "apache-airflow", "task-sdk"]

PACKAGES_METADATA_EXCLUDE_NAMES = ["docker-stack", "apache-airflow-providers"]

s3_client = boto3.client("s3")
cloudfront_client = boto3.client("cloudfront")
Expand Down
56 changes: 56 additions & 0 deletions dev/breeze/tests/test_docs_version_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

import os
from unittest.mock import patch

import pytest

from airflow_breeze.utils.docs_version_validation import error_versions, validate_docs_version


class TestValidateDocsVersion:
def setup_method(self):
os.environ["AIRFLOW_SITE_DIRECTORY"] = "/path/to/docs-archive"
error_versions.clear()

@patch("os.listdir")
@patch("os.path.join")
def test_validate_docs_version_with_invalid_versions(self, mock_path_join, mock_listdir):
mock_listdir.side_effect = [
["apache-airflow", "apache-airflow-providers-google"],
["1.10.0", "stable", "invalid_version"],
["2.0.0", "stable", "stable.txt"],
]
mock_path_join.side_effect = lambda *args: "/".join(args)

with pytest.raises(SystemExit):
validate_docs_version()
assert "Invalid version: 'invalid_version' found under doc folder apache-airflow" in error_versions

@patch("os.listdir")
@patch("os.path.join")
def test_validate_docs_version_with_valid_versions(self, mock_path_join, mock_listdir):
mock_listdir.side_effect = [
["apache-airflow", "apache-airflow-providers-standard"],
["1.10.0", "stable"],
["2.0.0", "stable", "stable.txt"],
]
mock_path_join.side_effect = lambda *args: "/".join(args)
validate_docs_version()
assert not error_versions, f"No errors should be found for valid versions, {error_versions}"
Loading