Skip to content

Commit

Permalink
Validating provider documentation urls before displaying in views (#4…
Browse files Browse the repository at this point in the history
  • Loading branch information
amoghrajesh authored Jul 23, 2024
1 parent 5b28933 commit f1852c2
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
12 changes: 10 additions & 2 deletions airflow/utils/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,25 @@ def get_docs_url(page: str | None = None) -> str:
return result


def get_project_url_from_metadata(provider_name: str):
"""Return the Project-URL from metadata."""
return metadata.metadata(provider_name).get_all("Project-URL")


def get_doc_url_for_provider(provider_name: str, provider_version: str) -> str:
"""Prepare link to Airflow Provider documentation."""
try:
metadata_items = metadata.metadata(provider_name).get_all("Project-URL")
from urllib.parse import urlparse

metadata_items = get_project_url_from_metadata(provider_name)
if isinstance(metadata_items, str):
metadata_items = [metadata_items]
if metadata_items:
for item in metadata_items:
if item.lower().startswith("documentation"):
_, _, url = item.partition(",")
if url:
parsed_url = urlparse(url)
if url and (parsed_url.scheme in ("http", "https") and bool(parsed_url.netloc)):
return url.strip()
except metadata.PackageNotFoundError:
pass
Expand Down
31 changes: 31 additions & 0 deletions tests/www/views/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
write_webserver_configuration_if_needed,
)
from airflow.plugins_manager import AirflowPlugin, EntryPointSource
from airflow.utils.docs import get_doc_url_for_provider
from airflow.utils.task_group import TaskGroup
from airflow.www.views import (
ProviderView,
Expand Down Expand Up @@ -180,6 +181,36 @@ def test__clean_description(admin_client, provider_description, expected):
assert actual == expected


@pytest.mark.parametrize(
"provider_name, project_url, expected",
[
(
"apache-airflow-providers-airbyte",
"Documentation, https://airflow.apache.org/docs/apache-airflow-providers-airbyte/3.8.1/",
"https://airflow.apache.org/docs/apache-airflow-providers-airbyte/3.8.1/",
),
(
"apache-airflow-providers-amazon",
"Documentation, https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.25.0/",
"https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.25.0/",
),
(
"apache-airflow-providers-apache-druid",
"Documentation, javascript:prompt(document.domain)",
# the default one is returned
"https://airflow.apache.org/docs/apache-airflow-providers-apache-druid/1.0.0/",
),
],
)
@patch("airflow.utils.docs.get_project_url_from_metadata")
def test_get_doc_url_for_provider(
mock_get_project_url_from_metadata, admin_client, provider_name, project_url, expected
):
mock_get_project_url_from_metadata.return_value = [project_url]
actual = get_doc_url_for_provider(provider_name, "1.0.0")
assert actual == expected


def test_endpoint_should_not_be_unauthenticated(app):
resp = app.test_client().get("/provider", follow_redirects=True)
check_content_not_in_response("Providers", resp)
Expand Down

0 comments on commit f1852c2

Please sign in to comment.