Skip to content

Commit

Permalink
CAT: Validate connector documentation (#34380)
Browse files Browse the repository at this point in the history
  • Loading branch information
darynaishchenko authored Feb 8, 2024
1 parent 2a323cc commit 8107081
Show file tree
Hide file tree
Showing 20 changed files with 1,616 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 3.4.0
Add TestConnectorDocumentation suite for validating connectors documentation structure and content.

## 3.3.3
Аix `NoAdditionalPropertiesValidator` if no type found in `items`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ class ConnectorAttributesConfig(BaseConfig):
)


class TestConnectorDocumentationConfig(BaseConfig):
timeout_seconds: int = timeout_seconds
config_path: str = config_path


class GenericTestConfig(GenericModel, Generic[TestConfigT]):
bypass_reason: Optional[str]
tests: Optional[List[TestConfigT]]
Expand All @@ -278,6 +283,7 @@ class AcceptanceTestConfigurations(BaseConfig):
full_refresh: Optional[GenericTestConfig[FullRefreshConfig]]
incremental: Optional[GenericTestConfig[IncrementalConfig]]
connector_attributes: Optional[GenericTestConfig[ConnectorAttributesConfig]]
connector_documentation: Optional[GenericTestConfig[TestConnectorDocumentationConfig]]


class Config(BaseConfig):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,3 +395,16 @@ def pytest_sessionfinish(session, exitstatus):
@pytest.fixture(name="connector_metadata")
def connector_metadata_fixture(base_path) -> dict:
return load_yaml_or_json_path(base_path / "metadata.yaml")


@pytest.fixture(name="docs_path")
def docs_path_fixture(base_path, connector_metadata) -> Path:
path_to_docs = connector_metadata["data"]["documentationUrl"].replace("https://docs.airbyte.com", "docs") + ".md"
airbyte_path = Path(base_path).parents[6]
return airbyte_path / path_to_docs


@pytest.fixture(name="connector_documentation")
def connector_documentation_fixture(docs_path: str) -> str:
with open(docs_path, "r") as f:
return f.read().rstrip()
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#

from .test_core import TestBasicRead, TestConnection, TestConnectorAttributes, TestDiscovery, TestSpec
from .test_core import TestBasicRead, TestConnection, TestConnectorAttributes, TestDiscovery, TestSpec, TestConnectorDocumentation
from .test_full_refresh import TestFullRefresh
from .test_incremental import TestIncremental

__all__ = ["TestSpec", "TestBasicRead", "TestConnection", "TestConnectorAttributes", "TestDiscovery", "TestFullRefresh", "TestIncremental"]
__all__ = ["TestSpec", "TestBasicRead", "TestConnection", "TestConnectorAttributes", "TestDiscovery", "TestFullRefresh", "TestIncremental", "TestConnectorDocumentation"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
2. Click Sources and then click + New source/destination.
3. On the Set up the source page, select {connector_name} from the Source type dropdown.
4. Enter a name for the {connector_name} connector.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

1. Navigate to the Airbyte Open Source dashboard.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

<HideInUI>

This page contains the setup guide and reference information for the [{connector_name}]({docs_link}) source connector.

</HideInUI>
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

The {connector_name} source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

Now that you have set up the {connector_name} source connector, check out the following {connector_name} tutorials:
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
from functools import reduce
from logging import Logger
from os.path import splitext
from pathlib import Path
from threading import Thread
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set, Tuple
from xmlrpc.client import Boolean

import connector_acceptance_test.utils.docs as docs_utils
import dpath.util
import jsonschema
import pytest
import requests
from airbyte_protocol.models import (
AirbyteRecordMessage,
AirbyteStream,
Expand Down Expand Up @@ -1335,3 +1339,192 @@ async def test_certified_connector_has_suggested_streams(
assert (
has_assigned_suggested_streams
), f"The `streams` empty list is not allowed for `metadata.data.suggestedStreams` for certified connectors."


class TestConnectorDocumentation(BaseTest):
MANDATORY_FOR_TEST_STRICTNESS_LEVELS = [] # Used so that this is not part of the mandatory high strictness test suite yet

PREREQUISITES = "Prerequisites"
HEADING = "heading"
CREDENTIALS_KEYWORDS = ["account", "auth", "credentials", "access"]
CONNECTOR_SPECIFIC_HEADINGS = "<Connector-specific features>"

@pytest.fixture(name="operational_certification_test")
async def operational_certification_test_fixture(self, connector_metadata: dict) -> bool:
"""
Fixture that is used to skip a test that is reserved only for connectors that are supposed to be tested
against operational certification criteria
"""
if connector_metadata.get("data", {}).get("ab_internal", {}).get("ql") < 400:
pytest.skip("Skipping testing source connector documentation due to low ql.")
return True

def _get_template_headings(self, connector_name: str) -> tuple[tuple[str], tuple[str]]:
"""
https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw - standard template
Headings in order to docs structure.
"""
all_headings = (
connector_name,
"Prerequisites",
"Setup guide",
f"Set up {connector_name}",
"For Airbyte Cloud:",
"For Airbyte Open Source:",
f"Set up the {connector_name} connector in Airbyte",
"For Airbyte Cloud:",
"For Airbyte Open Source:",
"Supported sync modes",
"Supported Streams",
self.CONNECTOR_SPECIFIC_HEADINGS,
"Performance considerations",
"Data type map",
"Troubleshooting",
"Tutorials",
"Changelog",
)
not_required_heading = (
f"Set up the {connector_name} connector in Airbyte",
"For Airbyte Cloud:",
"For Airbyte Open Source:",
self.CONNECTOR_SPECIFIC_HEADINGS,
"Performance considerations",
"Data type map",
"Troubleshooting",
"Tutorials",
)
return all_headings, not_required_heading

def _headings_description(self, connector_name: str) -> dict[str:Path]:
"""
Headings with path to file with template description
"""
descriptions_paths = {
connector_name: Path(__file__).parent / "doc_templates/source.txt",
"For Airbyte Cloud:": Path(__file__).parent / "doc_templates/for_airbyte_cloud.txt",
"For Airbyte Open Source:": Path(__file__).parent / "doc_templates/for_airbyte_open_source.txt",
"Supported sync modes": Path(__file__).parent / "doc_templates/supported_sync_modes.txt",
"Tutorials": Path(__file__).parent / "doc_templates/tutorials.txt",
}
return descriptions_paths

def test_prerequisites_content(
self, operational_certification_test, actual_connector_spec: ConnectorSpecification, connector_documentation: str, docs_path: str
):
node = docs_utils.documentation_node(connector_documentation)
header_line_map = {docs_utils.header_name(n): n.map[1] for n in node if n.type == self.HEADING}
headings = tuple(header_line_map.keys())

if not header_line_map.get(self.PREREQUISITES):
pytest.fail(f"Documentation does not have {self.PREREQUISITES} section.")

prereq_start_line = header_line_map[self.PREREQUISITES]
prereq_end_line = docs_utils.description_end_line_index(self.PREREQUISITES, headings, header_line_map)

with open(docs_path, "r") as docs_file:
prereq_content_lines = docs_file.readlines()[prereq_start_line:prereq_end_line]
# adding real character to avoid accidentally joining lines into a wanted title.
prereq_content = "|".join(prereq_content_lines).lower()
required_titles, has_credentials = docs_utils.required_titles_from_spec(actual_connector_spec.connectionSpecification)

for title in required_titles:
assert title in prereq_content, (
f"Required '{title}' field is not in {self.PREREQUISITES} section " f"or title in spec doesn't match name in the docs."
)

if has_credentials:
# credentials has specific check for keywords as we have a lot of way how to describe this step
credentials_validation = [k in prereq_content for k in self.CREDENTIALS_KEYWORDS]
assert True in credentials_validation, f"Required 'credentials' field is not in {self.PREREQUISITES} section."

def test_docs_structure(self, operational_certification_test, connector_documentation: str, connector_metadata: dict):
"""
test_docs_structure gets all top-level headers from source documentation file and check that the order is correct.
The order of the headers should follow our standard template https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw.
_get_template_headings returns tuple of headers as in standard template and non-required headers that might nor be in the source docs.
CONNECTOR_SPECIFIC_HEADINGS value in list of required headers that shows a place where should be a connector specific headers,
which can be skipped as out of standard template and depend of connector.
"""

heading_names = docs_utils.prepare_headers(connector_documentation)
template_headings, non_required_heading = self._get_template_headings(connector_metadata["data"]["name"])

heading_names_len, template_headings_len = len(heading_names), len(template_headings)
heading_names_index, template_headings_index = 0, 0

while heading_names_index < heading_names_len and template_headings_index < template_headings_len:
heading_names_value = heading_names[heading_names_index]
template_headings_value = template_headings[template_headings_index]
# check that template header is specific for connector and actual header should not be validated
if template_headings_value == self.CONNECTOR_SPECIFIC_HEADINGS:
# check that actual header is not in required headers, as required headers should be on a right place and order
if heading_names_value not in template_headings:
heading_names_index += 1 # go to the next actual header as CONNECTOR_SPECIFIC_HEADINGS can be more than one
continue
else:
# if actual header is required go to the next template header to validate actual header order
template_headings_index += 1
continue
# strict check that actual header equals template header
if heading_names_value == template_headings_value:
# found expected header, go to the next header in template and actual headers
heading_names_index += 1
template_headings_index += 1
continue
# actual header != template header means that template value is not required and can be skipped
if template_headings_value in non_required_heading:
# found non-required header, go to the next template header to validate actual header
template_headings_index += 1
continue
# any check is True, indexes didn't move to the next step
pytest.fail(docs_utils.reason_titles_not_match(heading_names_value, template_headings_value, template_headings))
# indexes didn't move to the last required one, so some headers are missed
if template_headings_index != template_headings_len:
pytest.fail(docs_utils.reason_missing_titles(template_headings_index, template_headings))

def test_docs_descriptions(
self, operational_certification_test, docs_path: str, connector_documentation: str, connector_metadata: dict
):
connector_name = connector_metadata["data"]["name"]
template_descriptions = self._headings_description(connector_name)

node = docs_utils.documentation_node(connector_documentation)
header_line_map = {docs_utils.header_name(n): n.map[1] for n in node if n.type == self.HEADING}
actual_headings = tuple(header_line_map.keys())

for heading, description in template_descriptions.items():
if heading in actual_headings:

description_start_line = header_line_map[heading]
description_end_line = docs_utils.description_end_line_index(heading, actual_headings, header_line_map)

with open(docs_path, "r") as docs_file, open(description, "r") as template_file:

docs_description_content = docs_file.readlines()[description_start_line:description_end_line]
template_description_content = template_file.readlines()

for d, t in zip(docs_description_content, template_description_content):
d, t = docs_utils.prepare_lines_to_compare(connector_name, d, t)
assert d == t, f"Description for '{heading}' does not follow structure.\nExpected: {t} Actual: {d}"

def test_validate_links(self, operational_certification_test, connector_documentation: str):
valid_status_codes = [200, 403, 401, 405] # we skip 4xx due to needed access
links = re.findall("(https?://[^\s)]+)", connector_documentation)
invalid_links = []
threads = []

def validate_docs_links(docs_link):
response = requests.get(docs_link)
if response.status_code not in valid_status_codes:
invalid_links.append(docs_link)

for link in links:
process = Thread(target=validate_docs_links, args=[link])
process.start()
threads.append(process)

for process in threads:
process.join(timeout=30) # 30s timeout for process else link will be skipped
process.is_alive()

assert not invalid_links, f"{len(invalid_links)} invalid links were found in the connector documentation: {invalid_links}."
Loading

0 comments on commit 8107081

Please sign in to comment.