diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py index 1eded07332772..c330047b7cd97 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -195,6 +195,49 @@ class PrepareReleaseDocsUserQuitException(Exception): } +def classification_result(changed_files): + if not changed_files: + return "other" + + has_docs = any(re.match(r"^providers/[^/]+/docs/", f) and f.endswith(".rst") for f in changed_files) + + has_test_or_example_only = all( + re.match(r"^providers/[^/]+/tests/", f) + or re.match(r"^providers/[^/]+/src/airflow/providers/[^/]+/example_dags/", f) + for f in changed_files + ) + + if has_docs: + return "documentation" + if has_test_or_example_only: + return "test_or_example_only" + return "other" + + +def classify_provider_pr_files(commit_hash: str) -> str: + """ + Classify a provider commit based on changed files. + + - Returns 'documentation' if any provider doc files are present. + - Returns 'test_or_example_only' if only test/example DAGs changed. + - Returns 'other' otherwise. + """ + try: + result = run_command( + ["git", "diff", "--name-only", f"{commit_hash}^", commit_hash], + cwd=AIRFLOW_ROOT_PATH, + capture_output=True, + text=True, + check=True, + ) + changed_files = result.stdout.strip().splitlines() + except subprocess.CalledProcessError: + # safe to return other here + return "other" + + return classification_result(changed_files) + + def _get_git_log_command( folder_paths: list[Path] | None = None, from_commit: str | None = None, to_commit: str | None = None ) -> list[str]: @@ -715,39 +758,6 @@ def _update_commits_rst( ) -def _is_test_or_example_dag_only_changes(commit_hash: str) -> bool: - """ - Check if a commit contains only test-related or example DAG changes - by using the git diff command. - - Considers files in airflow/providers/{provider}/tests/ - and airflow/providers/{provider}/src/airflow/providers/{provider}/example_dags/ - as test/example-only files. - - :param commit_hash: The full commit hash to check - :return: True if changes are only in test/example files, False otherwise - """ - try: - result = run_command( - ["git", "diff", "--name-only", f"{commit_hash}^", commit_hash], - cwd=AIRFLOW_ROOT_PATH, - capture_output=True, - text=True, - check=True, - ) - changed_files = result.stdout.strip().splitlines() - - for file_path in changed_files: - if not ( - re.match(r"providers/[^/]+/tests/", file_path) - or re.match(r"providers/[^/]+/src/airflow/providers/[^/]+/example_dags/", file_path) - ): - return False - return True - except subprocess.CalledProcessError: - return False - - def update_release_notes( provider_id: str, reapply_templates_only: bool, @@ -817,9 +827,16 @@ def update_release_notes( ) change = list_of_list_of_changes[0][table_iter] - if change.pr and _is_test_or_example_dag_only_changes(change.full_hash): + classification = classify_provider_pr_files(change.full_hash) + if classification == "documentation": + get_console().print( + f"[green]Automatically classifying change as DOCUMENTATION since it contains only doc changes:[/]\n" + f"[blue]{formatted_message}[/]" + ) + type_of_change = TypeOfChange.DOCUMENTATION + elif classification == "test_or_example_only": get_console().print( - f"[green]Automatically classifying change as SKIPPED since it only contains test changes:[/]\n" + f"[green]Automatically classifying change as SKIPPED since it only contains test/example changes:[/]\n" f"[blue]{formatted_message}[/]" ) type_of_change = TypeOfChange.SKIP diff --git a/dev/breeze/tests/test_provider_documentation.py b/dev/breeze/tests/test_provider_documentation.py index 43a058c998112..b55088987c632 100644 --- a/dev/breeze/tests/test_provider_documentation.py +++ b/dev/breeze/tests/test_provider_documentation.py @@ -33,6 +33,7 @@ _get_change_from_line, _get_changes_classified, _get_git_log_command, + classification_result, get_most_impactful_change, get_version_tag, ) @@ -391,3 +392,54 @@ def test_version_bump_for_provider_documentation(initial_version, bump_index, ex ) def test_get_most_impactful_change(changes, expected): assert get_most_impactful_change(changes) == expected + + +@pytest.mark.parametrize( + "changed_files, expected", + [ + pytest.param(["providers/slack/docs/slack.rst"], "documentation", id="only_docs"), + pytest.param(["providers/slack/tests/test_slack.py"], "test_or_example_only", id="only_tests"), + pytest.param( + ["providers/slack/src/airflow/providers/slack/example_dags/example_notify.py"], + "test_or_example_only", + id="only_example_dags", + ), + pytest.param( + [ + "providers/slack/tests/test_slack.py", + "providers/slack/src/airflow/providers/slack/example_dags/example_notify.py", + ], + "test_or_example_only", + id="tests_and_example_dags", + ), + pytest.param( + [ + "providers/slack/tests/test_slack.py", + "providers/slack/docs/slack.rst", + ], + "documentation", + id="docs_and_tests", + ), + pytest.param( + [ + "providers/slack/src/airflow/providers/slack/hooks/slack.py", + "providers/slack/docs/slack.rst", + ], + "documentation", + id="docs_and_real_code", + ), + pytest.param( + [ + "providers/slack/src/airflow/providers/slack/hooks/slack.py", + "providers/slack/tests/test_slack.py", + ], + "other", + id="real_code_and_tests", + ), + pytest.param(["airflow/utils/db.py"], "other", id="non_provider_file"), + pytest.param([], "other", id="empty_commit"), + ], +) +def test_classify_provider_pr_files_logic(changed_files, expected): + result = classification_result(changed_files) + assert result == expected