diff --git a/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md index b7e87e84ed86..a41617cf130f 100644 --- a/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md +++ b/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 3.4.0 +Add TestConnectorDocumentation suite for validating connectors documentation structure and content. + ## 3.3.3 Аix `NoAdditionalPropertiesValidator` if no type found in `items` diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py index f269fe15db0a..fbb823185b52 100644 --- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py @@ -259,6 +259,11 @@ class ConnectorAttributesConfig(BaseConfig): ) +class TestConnectorDocumentationConfig(BaseConfig): + timeout_seconds: int = timeout_seconds + config_path: str = config_path + + class GenericTestConfig(GenericModel, Generic[TestConfigT]): bypass_reason: Optional[str] tests: Optional[List[TestConfigT]] @@ -278,6 +283,7 @@ class AcceptanceTestConfigurations(BaseConfig): full_refresh: Optional[GenericTestConfig[FullRefreshConfig]] incremental: Optional[GenericTestConfig[IncrementalConfig]] connector_attributes: Optional[GenericTestConfig[ConnectorAttributesConfig]] + connector_documentation: Optional[GenericTestConfig[TestConnectorDocumentationConfig]] class Config(BaseConfig): diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py index f0df880650fa..dd3f6b5701c5 100644 --- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py @@ -395,3 +395,16 @@ def pytest_sessionfinish(session, exitstatus): @pytest.fixture(name="connector_metadata") def connector_metadata_fixture(base_path) -> dict: return load_yaml_or_json_path(base_path / "metadata.yaml") + + +@pytest.fixture(name="docs_path") +def docs_path_fixture(base_path, connector_metadata) -> Path: + path_to_docs = connector_metadata["data"]["documentationUrl"].replace("https://docs.airbyte.com", "docs") + ".md" + airbyte_path = Path(base_path).parents[6] + return airbyte_path / path_to_docs + + +@pytest.fixture(name="connector_documentation") +def connector_documentation_fixture(docs_path: str) -> str: + with open(docs_path, "r") as f: + return f.read().rstrip() diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py index be9bde14f32c..5236bba39b4b 100644 --- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py @@ -2,8 +2,8 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # -from .test_core import TestBasicRead, TestConnection, TestConnectorAttributes, TestDiscovery, TestSpec +from .test_core import TestBasicRead, TestConnection, TestConnectorAttributes, TestDiscovery, TestSpec, TestConnectorDocumentation from .test_full_refresh import TestFullRefresh from .test_incremental import TestIncremental -__all__ = ["TestSpec", "TestBasicRead", "TestConnection", "TestConnectorAttributes", "TestDiscovery", "TestFullRefresh", "TestIncremental"] +__all__ = ["TestSpec", "TestBasicRead", "TestConnection", "TestConnectorAttributes", "TestDiscovery", "TestFullRefresh", "TestIncremental", "TestConnectorDocumentation"] diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_cloud.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_cloud.txt new file mode 100644 index 000000000000..54946b70acdc --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_cloud.txt @@ -0,0 +1,5 @@ + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. Click Sources and then click + New source/destination. +3. On the Set up the source page, select {connector_name} from the Source type dropdown. +4. Enter a name for the {connector_name} connector. diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_open_source.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_open_source.txt new file mode 100644 index 000000000000..c5249d21023a --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_open_source.txt @@ -0,0 +1,2 @@ + +1. Navigate to the Airbyte Open Source dashboard. diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/source.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/source.txt new file mode 100644 index 000000000000..d7b36f402209 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/source.txt @@ -0,0 +1,6 @@ + + + +This page contains the setup guide and reference information for the [{connector_name}]({docs_link}) source connector. + + diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/supported_sync_modes.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/supported_sync_modes.txt new file mode 100644 index 000000000000..51e8f806ff65 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/supported_sync_modes.txt @@ -0,0 +1,2 @@ + +The {connector_name} source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/tutorials.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/tutorials.txt new file mode 100644 index 000000000000..584c04f8daf5 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/tutorials.txt @@ -0,0 +1,2 @@ + +Now that you have set up the {connector_name} source connector, check out the following {connector_name} tutorials: diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py index dab1ba148b9c..a91617944a79 100644 --- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py @@ -10,12 +10,16 @@ from functools import reduce from logging import Logger from os.path import splitext +from pathlib import Path +from threading import Thread from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set, Tuple from xmlrpc.client import Boolean +import connector_acceptance_test.utils.docs as docs_utils import dpath.util import jsonschema import pytest +import requests from airbyte_protocol.models import ( AirbyteRecordMessage, AirbyteStream, @@ -1335,3 +1339,192 @@ async def test_certified_connector_has_suggested_streams( assert ( has_assigned_suggested_streams ), f"The `streams` empty list is not allowed for `metadata.data.suggestedStreams` for certified connectors." + + +class TestConnectorDocumentation(BaseTest): + MANDATORY_FOR_TEST_STRICTNESS_LEVELS = [] # Used so that this is not part of the mandatory high strictness test suite yet + + PREREQUISITES = "Prerequisites" + HEADING = "heading" + CREDENTIALS_KEYWORDS = ["account", "auth", "credentials", "access"] + CONNECTOR_SPECIFIC_HEADINGS = "" + + @pytest.fixture(name="operational_certification_test") + async def operational_certification_test_fixture(self, connector_metadata: dict) -> bool: + """ + Fixture that is used to skip a test that is reserved only for connectors that are supposed to be tested + against operational certification criteria + """ + if connector_metadata.get("data", {}).get("ab_internal", {}).get("ql") < 400: + pytest.skip("Skipping testing source connector documentation due to low ql.") + return True + + def _get_template_headings(self, connector_name: str) -> tuple[tuple[str], tuple[str]]: + """ + https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw - standard template + Headings in order to docs structure. + """ + all_headings = ( + connector_name, + "Prerequisites", + "Setup guide", + f"Set up {connector_name}", + "For Airbyte Cloud:", + "For Airbyte Open Source:", + f"Set up the {connector_name} connector in Airbyte", + "For Airbyte Cloud:", + "For Airbyte Open Source:", + "Supported sync modes", + "Supported Streams", + self.CONNECTOR_SPECIFIC_HEADINGS, + "Performance considerations", + "Data type map", + "Troubleshooting", + "Tutorials", + "Changelog", + ) + not_required_heading = ( + f"Set up the {connector_name} connector in Airbyte", + "For Airbyte Cloud:", + "For Airbyte Open Source:", + self.CONNECTOR_SPECIFIC_HEADINGS, + "Performance considerations", + "Data type map", + "Troubleshooting", + "Tutorials", + ) + return all_headings, not_required_heading + + def _headings_description(self, connector_name: str) -> dict[str:Path]: + """ + Headings with path to file with template description + """ + descriptions_paths = { + connector_name: Path(__file__).parent / "doc_templates/source.txt", + "For Airbyte Cloud:": Path(__file__).parent / "doc_templates/for_airbyte_cloud.txt", + "For Airbyte Open Source:": Path(__file__).parent / "doc_templates/for_airbyte_open_source.txt", + "Supported sync modes": Path(__file__).parent / "doc_templates/supported_sync_modes.txt", + "Tutorials": Path(__file__).parent / "doc_templates/tutorials.txt", + } + return descriptions_paths + + def test_prerequisites_content( + self, operational_certification_test, actual_connector_spec: ConnectorSpecification, connector_documentation: str, docs_path: str + ): + node = docs_utils.documentation_node(connector_documentation) + header_line_map = {docs_utils.header_name(n): n.map[1] for n in node if n.type == self.HEADING} + headings = tuple(header_line_map.keys()) + + if not header_line_map.get(self.PREREQUISITES): + pytest.fail(f"Documentation does not have {self.PREREQUISITES} section.") + + prereq_start_line = header_line_map[self.PREREQUISITES] + prereq_end_line = docs_utils.description_end_line_index(self.PREREQUISITES, headings, header_line_map) + + with open(docs_path, "r") as docs_file: + prereq_content_lines = docs_file.readlines()[prereq_start_line:prereq_end_line] + # adding real character to avoid accidentally joining lines into a wanted title. + prereq_content = "|".join(prereq_content_lines).lower() + required_titles, has_credentials = docs_utils.required_titles_from_spec(actual_connector_spec.connectionSpecification) + + for title in required_titles: + assert title in prereq_content, ( + f"Required '{title}' field is not in {self.PREREQUISITES} section " f"or title in spec doesn't match name in the docs." + ) + + if has_credentials: + # credentials has specific check for keywords as we have a lot of way how to describe this step + credentials_validation = [k in prereq_content for k in self.CREDENTIALS_KEYWORDS] + assert True in credentials_validation, f"Required 'credentials' field is not in {self.PREREQUISITES} section." + + def test_docs_structure(self, operational_certification_test, connector_documentation: str, connector_metadata: dict): + """ + test_docs_structure gets all top-level headers from source documentation file and check that the order is correct. + The order of the headers should follow our standard template https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw. + _get_template_headings returns tuple of headers as in standard template and non-required headers that might nor be in the source docs. + CONNECTOR_SPECIFIC_HEADINGS value in list of required headers that shows a place where should be a connector specific headers, + which can be skipped as out of standard template and depend of connector. + """ + + heading_names = docs_utils.prepare_headers(connector_documentation) + template_headings, non_required_heading = self._get_template_headings(connector_metadata["data"]["name"]) + + heading_names_len, template_headings_len = len(heading_names), len(template_headings) + heading_names_index, template_headings_index = 0, 0 + + while heading_names_index < heading_names_len and template_headings_index < template_headings_len: + heading_names_value = heading_names[heading_names_index] + template_headings_value = template_headings[template_headings_index] + # check that template header is specific for connector and actual header should not be validated + if template_headings_value == self.CONNECTOR_SPECIFIC_HEADINGS: + # check that actual header is not in required headers, as required headers should be on a right place and order + if heading_names_value not in template_headings: + heading_names_index += 1 # go to the next actual header as CONNECTOR_SPECIFIC_HEADINGS can be more than one + continue + else: + # if actual header is required go to the next template header to validate actual header order + template_headings_index += 1 + continue + # strict check that actual header equals template header + if heading_names_value == template_headings_value: + # found expected header, go to the next header in template and actual headers + heading_names_index += 1 + template_headings_index += 1 + continue + # actual header != template header means that template value is not required and can be skipped + if template_headings_value in non_required_heading: + # found non-required header, go to the next template header to validate actual header + template_headings_index += 1 + continue + # any check is True, indexes didn't move to the next step + pytest.fail(docs_utils.reason_titles_not_match(heading_names_value, template_headings_value, template_headings)) + # indexes didn't move to the last required one, so some headers are missed + if template_headings_index != template_headings_len: + pytest.fail(docs_utils.reason_missing_titles(template_headings_index, template_headings)) + + def test_docs_descriptions( + self, operational_certification_test, docs_path: str, connector_documentation: str, connector_metadata: dict + ): + connector_name = connector_metadata["data"]["name"] + template_descriptions = self._headings_description(connector_name) + + node = docs_utils.documentation_node(connector_documentation) + header_line_map = {docs_utils.header_name(n): n.map[1] for n in node if n.type == self.HEADING} + actual_headings = tuple(header_line_map.keys()) + + for heading, description in template_descriptions.items(): + if heading in actual_headings: + + description_start_line = header_line_map[heading] + description_end_line = docs_utils.description_end_line_index(heading, actual_headings, header_line_map) + + with open(docs_path, "r") as docs_file, open(description, "r") as template_file: + + docs_description_content = docs_file.readlines()[description_start_line:description_end_line] + template_description_content = template_file.readlines() + + for d, t in zip(docs_description_content, template_description_content): + d, t = docs_utils.prepare_lines_to_compare(connector_name, d, t) + assert d == t, f"Description for '{heading}' does not follow structure.\nExpected: {t} Actual: {d}" + + def test_validate_links(self, operational_certification_test, connector_documentation: str): + valid_status_codes = [200, 403, 401, 405] # we skip 4xx due to needed access + links = re.findall("(https?://[^\s)]+)", connector_documentation) + invalid_links = [] + threads = [] + + def validate_docs_links(docs_link): + response = requests.get(docs_link) + if response.status_code not in valid_status_codes: + invalid_links.append(docs_link) + + for link in links: + process = Thread(target=validate_docs_links, args=[link]) + process.start() + threads.append(process) + + for process in threads: + process.join(timeout=30) # 30s timeout for process else link will be skipped + process.is_alive() + + assert not invalid_links, f"{len(invalid_links)} invalid links were found in the connector documentation: {invalid_links}." diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/utils/docs.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/utils/docs.py new file mode 100644 index 000000000000..2b29a65ce5e4 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/utils/docs.py @@ -0,0 +1,133 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +import re +from difflib import get_close_matches +from typing import Any + +from markdown_it import MarkdownIt +from markdown_it.tree import SyntaxTreeNode + + +def remove_step_from_heading(heading: str) -> str: + if "Step 1: " in heading: + return heading.replace("Step 1: ", "") + if "Step 2: " in heading: + return heading.replace("Step 2: ", "") + return heading + + +def required_titles_from_spec(spec: dict[str, Any]) -> tuple[list[str], bool]: + has_credentials = False + spec_required = spec["required"] + spec_properties = spec["properties"].keys() + creds = ["credentials", "client_id", "client_secret", "access_token", "refresh_token"] + + if "credentials" in spec["required"] or "client_id" in spec["required"] or "client_secret" in spec_required: + has_credentials = True + if "credentials" in spec["required"] or "client_id" in spec["required"] or "client_secret" in spec_properties: + has_credentials = True + if has_credentials: + [spec_required.remove(cred) for cred in creds if cred in spec_required] + + titles = [spec["properties"][field]["title"].lower() for field in spec_required] + return titles, has_credentials + + +def documentation_node(connector_documentation: str) -> SyntaxTreeNode: + md = MarkdownIt("commonmark") + tokens = md.parse(connector_documentation) + return SyntaxTreeNode(tokens) + + +def header_name(n: SyntaxTreeNode) -> str: + return n.to_tokens()[1].children[0].content + + +def prepare_lines_to_compare(connector_name: str, docs_line: str, template_line: str) -> tuple[str]: + def _replace_link(docs_string: str, link_to_replace: str) -> str: + try: + docs_string = docs_string[: docs_string.index("(")] + link_to_replace + docs_string[docs_string.index(")") + 1 :] + return docs_string + except ValueError: # ValueError if actual docs doesn't have expected links + return docs_string + + connector_name_to_replace = "{connector_name}" + link_to_replace = "({docs_link})" + + template_line = ( + template_line.replace(connector_name_to_replace, connector_name) if connector_name_to_replace in template_line else template_line + ) + docs_line = _replace_link(docs_line, link_to_replace) if link_to_replace in template_line else docs_line + + return docs_line, template_line + + +def remove_not_required_step_headers(headers: tuple[str]) -> tuple[str]: + """ + Removes headers like Step 1.1 Step 3 Step 2.3 from actual headers, if they placed after Step 1: header. + from: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 1.11: do something 11", + "Step 2: do something 2", "Step 2.1: do something 2.1", "Changelog" + To: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 2: do something 2", "Changelog" + This is connector specific headers, so we can ignore them. + """ + step_one_index = None + for header in headers: + if re.search("Step 1: ", header): + step_one_index = headers.index(header) + if not step_one_index: # docs doesn't have Step 1 headers + return headers + + step_headers = headers[step_one_index:] + pattern = "Step \d+.?\d*: " + step = "Step 1: " + i = 0 + while i < len(step_headers): + if step in step_headers[i]: # if Step 1/2: is substring of current header + if i + 1 < len(step_headers) and re.match(pattern, step_headers[i + 1]): # check that header has Step x: + if "Step 2: " in step_headers[i + 1]: # found Step 2, it's required header, move to the next one + step = "Step 2: " + i += 1 + continue + else: + step_headers.remove(step_headers[i + 1]) # remove all other steps from headers + continue # move to the next header after Step 1/2 header + else: + break + break + + headers = headers[:step_one_index] + headers.extend(step_headers) + return headers + + +def reason_titles_not_match(heading_names_value: str, template_headings_value: str, template_headings: list[str]) -> str: + reason = ( + f"Documentation structure doesn't follow standard template. Heading '{heading_names_value}' is not in the right place, " + f"the name of heading is incorrect or the heading name is not expected.\n" + ) + close_titles = get_close_matches(heading_names_value, template_headings) + if close_titles and close_titles[0] != heading_names_value: + diff = f"Diff:\nActual Heading: '{heading_names_value}'. Possible correct heading: '{close_titles}'. Expected Heading: '{template_headings_value}'." + else: + diff = f"Diff:\nActual Heading: '{heading_names_value}'. Expected Heading: '{template_headings_value}'" + return reason + diff + + +def reason_missing_titles(template_headings_index: int, template_headings: list[str]) -> str: + return ( + f"Documentation structure doesn't follow standard template. docs is not full." + f"\nMissing headers: {template_headings[template_headings_index:]}" + ) + + +def description_end_line_index(heading: str, actual_headings: list[str], header_line_map: dict[str, int]) -> int: + if actual_headings.index(heading) + 1 == len(actual_headings): + return -1 + return header_line_map[actual_headings[actual_headings.index(heading) + 1]] + + +def prepare_headers(connector_documentation: dict) -> list[str]: + node = documentation_node(connector_documentation) + headers = [header_name(n) for n in node if n.type == "heading"] # find all headers + headers = remove_not_required_step_headers(headers) # remove Step 1.1 Step 3 ... headers + headers = tuple([remove_step_from_heading(h) for h in headers]) # remove Step 1 and Step 2 from header name + return headers diff --git a/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml b/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml index e051c1949bc6..d090aaa6fe8c 100644 --- a/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml +++ b/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "connector-acceptance-test" -version = "3.3.3" +version = "3.4.0" description = "Contains acceptance tests for connectors." authors = ["Airbyte "] license = "MIT" diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct.md new file mode 100644 index 000000000000..6fdf34bba108 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct.md @@ -0,0 +1,305 @@ +# GitHub + + + +This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector. + + + +## Prerequisites + +- List of GitHub Repositories (and access for them in case they are private) + + +**For Airbyte Cloud:** + +- OAuth +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + + +**For Airbyte Open Source:** + +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + +## Setup guide + +### Step 1: Set up GitHub + +Create a [GitHub Account](https://github.com). + + +**Airbyte Open Source additional setup steps** + +Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +### Step 2: Set up the GitHub connector in Airbyte + + +**For Airbyte Cloud:** + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. In the left navigation bar, click **Sources**. +3. On the source selection page, select **GitHub** from the list of Sources. +4. Add a name for your GitHub connector. +5. To authenticate: + + + - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account. + + + + - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`. + +:::caution +Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs. +::: + +7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated. + +- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`. + +- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users` + +8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`). +9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + + + +## Supported sync modes + +The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) + +## Supported Streams + +This connector outputs the following full refresh streams: + +- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees) +- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches) +- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity) +- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators) +- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository) +- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations) +- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request) +- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags) +- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members) +- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user) +- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams) +- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members) +- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue) + +This connector outputs the following incremental streams: + +- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository) +- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment) +- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository) +- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits) +- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments) +- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events) +- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment) +- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository) +- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones) +- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue) +- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues) +- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards) +- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns) +- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects) +- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2) +- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment) +- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest) +- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests) +- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases) +- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories) +- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository) +- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request) +- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers) +- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run) +- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository) +- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows) + +### Notes + +1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they: + + - read only new records; + - output only new records. + +2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental: + + - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs); + - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run); + - output only new records. + +3. Other 19 incremental streams are also incremental but with one difference, they: + + - read all records; + - output only new records. + Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits. + +4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help. + **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:** + +- `assignees` +- `branches` +- `collaborators` +- `issue_labels` +- `organizations` +- `pull_request_commits` +- `pull_request_stats` +- `repositories` +- `tags` +- `teams` +- `users` + +## Limitations & Troubleshooting + +
+ +Expand to see details about GitHub connector limitations and troubleshooting. + + +### Connector limitations + +#### Rate limiting +The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + +#### Permissions and scopes + +If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes. + +Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions: + +- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github). +- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced. +- To sync the Projects stream, the repository must have the Projects feature enabled. + +### Troubleshooting + +* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions) + +
+ +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams | +| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | +| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | +| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | +| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | +| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ | +| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` | +| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. | +| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` | +| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages | +| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version | +| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` | +| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` | +| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` | +| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` | +| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances | +| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages | +| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` | +| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes | +| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references | +| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter | +| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter | +| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI | +| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests | +| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired | +| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. | +| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec | +| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation | +| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config | +| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" | +| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format | +| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form | +| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification | +| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT | +| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream | +| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message | +| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read | +| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 | +| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes | +| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes | +| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 | +| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors | +| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream | +| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream | +| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream | +| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error | +| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` | +| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories | +| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" | +| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream | +| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` | +| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" | +| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations | +| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" | +| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` | +| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream | +| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying | +| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream | +| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream | +| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching | +| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams | +| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages | +| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` | +| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` | +| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy | +| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` | +| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec | +| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream | +| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling | +| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns | +| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta | +| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code | +| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` | +| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` | +| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name | +| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits | +| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time | +| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. | +| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream | +| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams | +| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` | +| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | +| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | +| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | +| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub | +| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams | +| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos | +| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas | +| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability | +| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification | +| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches | +| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling | +| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams | +| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams | +| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream | +| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams | +| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values | +| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator | +| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation | +| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations | +| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams | +| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting | +| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream | +| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub | + +
\ No newline at end of file diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct_all_description_exist.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct_all_description_exist.md new file mode 100644 index 000000000000..43110297ed9d --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct_all_description_exist.md @@ -0,0 +1,38 @@ +# GitHub + + + +This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector. + + + +## For Airbyte Cloud: + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. Click Sources and then click + New source/destination. +3. On the Set up the source page, select GitHub from the Source type dropdown. +4. Enter a name for the GitHub connector. +5. Add list of GitHub repositories you want to sync. +6. Add Start Date from with data will be replicated. + +## For Airbyte Open Source: + +1. Navigate to the Airbyte Open Source dashboard. +2. Click Sources and then click + New source/destination. +3. On the Set up the source page, select GitHub from the Source type dropdown. +4. Enter a name for the GitHub connector. +5. Add list of GitHub repositories you want to sync. +6. Add Start Date from with data will be replicated. + +## Supported sync modes + +The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + +- Full Refresh +- Incremental + +## Tutorials + +Now that you have set up the GitHub source connector, check out the following GitHub tutorials: + +- [Creating PAT](https://docs.github.com/en/enterprise-server@3.9/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_header_order.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_header_order.md new file mode 100644 index 000000000000..3feb237735a7 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_header_order.md @@ -0,0 +1,306 @@ +## Prerequisites + +- List of GitHub Repositories (and access for them in case they are private) +- +# GitHub + + + +This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector. + + + + + +**For Airbyte Cloud:** + +- OAuth +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + + +**For Airbyte Open Source:** + +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + +### Step 1: Set up GitHub + +## Setup guide + +Create a [GitHub Account](https://github.com). + + +**Airbyte Open Source additional setup steps** + +Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +### Step 2: Set up the GitHub connector in Airbyte + + +**For Airbyte Cloud:** + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. In the left navigation bar, click **Sources**. +3. On the source selection page, select **GitHub** from the list of Sources. +4. Add a name for your GitHub connector. +5. To authenticate: + + + - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account. + + + + - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`. + +:::caution +Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs. +::: + +7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated. + +- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`. + +- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users` + +8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`). +9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + + + +## Supported sync modes + +The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) + +## Supported Streams + +This connector outputs the following full refresh streams: + +- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees) +- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches) +- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity) +- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators) +- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository) +- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations) +- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request) +- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags) +- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members) +- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user) +- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams) +- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members) +- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue) + +This connector outputs the following incremental streams: + +- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository) +- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment) +- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository) +- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits) +- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments) +- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events) +- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment) +- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository) +- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones) +- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue) +- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues) +- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards) +- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns) +- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects) +- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2) +- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment) +- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest) +- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests) +- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases) +- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories) +- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository) +- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request) +- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers) +- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run) +- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository) +- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows) + +### Notes + +1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they: + + - read only new records; + - output only new records. + +2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental: + + - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs); + - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run); + - output only new records. + +3. Other 19 incremental streams are also incremental but with one difference, they: + + - read all records; + - output only new records. + Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits. + +4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help. + **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:** + +- `assignees` +- `branches` +- `collaborators` +- `issue_labels` +- `organizations` +- `pull_request_commits` +- `pull_request_stats` +- `repositories` +- `tags` +- `teams` +- `users` + +## Limitations & Troubleshooting + +
+ +Expand to see details about GitHub connector limitations and troubleshooting. + + +### Connector limitations + +#### Rate limiting +The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + +#### Permissions and scopes + +If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes. + +Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions: + +- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github). +- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced. +- To sync the Projects stream, the repository must have the Projects feature enabled. + +### Troubleshooting + +* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions) + +
+ +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams | +| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | +| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | +| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | +| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | +| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ | +| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` | +| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. | +| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` | +| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages | +| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version | +| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` | +| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` | +| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` | +| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` | +| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances | +| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages | +| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` | +| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes | +| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references | +| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter | +| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter | +| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI | +| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests | +| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired | +| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. | +| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec | +| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation | +| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config | +| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" | +| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format | +| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form | +| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification | +| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT | +| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream | +| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message | +| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read | +| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 | +| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes | +| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes | +| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 | +| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors | +| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream | +| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream | +| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream | +| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error | +| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` | +| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories | +| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" | +| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream | +| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` | +| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" | +| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations | +| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" | +| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` | +| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream | +| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying | +| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream | +| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream | +| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching | +| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams | +| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages | +| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` | +| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` | +| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy | +| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` | +| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec | +| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream | +| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling | +| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns | +| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta | +| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code | +| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` | +| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` | +| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name | +| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits | +| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time | +| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. | +| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream | +| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams | +| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` | +| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | +| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | +| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | +| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub | +| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams | +| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos | +| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas | +| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability | +| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification | +| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches | +| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling | +| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams | +| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams | +| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream | +| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams | +| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values | +| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator | +| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation | +| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations | +| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams | +| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting | +| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream | +| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub | + +
\ No newline at end of file diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_not_all_structure.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_not_all_structure.md new file mode 100644 index 000000000000..b041e8cc78d4 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_not_all_structure.md @@ -0,0 +1,6 @@ +## GitHub + +## Prerequisites + +- Start Date - the start date to replicate your date. + diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/invalid_links.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/invalid_links.md new file mode 100644 index 000000000000..1efb99d700d7 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/invalid_links.md @@ -0,0 +1,305 @@ +# GitHub + + + +This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector. + + + +## Prerequisites + +- List of GitHub Repositories (and access for them in case they are private) + + +**For Airbyte Cloud:** + +- OAuth +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + + +**For Airbyte Open Source:** + +- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes)) + + +## Setup guide + +### Step 1: Set up GitHub + +Create a [GitHub Account](https://github.com). + + +**Airbyte Open Source additional setup steps** + +Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens-that_do_not_exist). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +### Step 2: Set up the GitHub connector in Airbyte + + +**For Airbyte Cloud:** + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. In the left navigation bar, click **Sources**. +3. On the source selection page, select **GitHub** from the list of Sources. +4. Add a name for your GitHub connector. +5. To authenticate: + + + - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account. + + + + - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`. + + +6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`. + +:::caution +Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs. +::: + +7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated. + +- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`. + +- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users` + +8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`). +9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + + + +## Supported sync modes + +The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): + +- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped) + +## Supported Streams + +This connector outputs the following full refresh streams: + +- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees) +- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches) +- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity) +- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators) +- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository) +- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations) +- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request) +- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags) +- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members) +- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user) +- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams) +- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members) +- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue) + +This connector outputs the following incremental streams: + +- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository) +- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment) +- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository) +- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits) +- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments) +- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events) +- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment) +- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository) +- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones) +- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue) +- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues) +- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards) +- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns) +- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects) +- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2) +- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment) +- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest) +- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests) +- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases) +- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories) +- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository) +- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request) +- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers) +- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run) +- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository) +- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows) + +### Notes + +1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they: + + - read only new records; + - output only new records. + +2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental: + + - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs); + - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run); + - output only new records. + +3. Other 19 incremental streams are also incremental but with one difference, they: + + - read all records; + - output only new records. + Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits. + +4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help. + **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:** + +- `assignees` +- `branches` +- `collaborators` +- `issue_labels` +- `organizations` +- `pull_request_commits` +- `pull_request_stats` +- `repositories` +- `tags` +- `teams` +- `users` + +## Limitations & Troubleshooting + +
+ +Expand to see details about GitHub connector limitations and troubleshooting. + + +### Connector limitations + +#### Rate limiting +The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api). + +#### Permissions and scopes + +If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes. + +Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions: + +- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github). +- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams_do_not_exists) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced. +- To sync the Projects stream, the repository must have the Projects feature enabled. + +### Troubleshooting + +* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions) + +
+ +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams | +| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | +| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | +| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | +| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | +| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ | +| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` | +| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. | +| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` | +| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages | +| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version | +| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` | +| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` | +| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` | +| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` | +| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances | +| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages | +| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` | +| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes | +| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references | +| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter | +| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter | +| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI | +| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests | +| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired | +| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. | +| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec | +| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation | +| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config | +| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" | +| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format | +| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form | +| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification | +| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT | +| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream | +| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message | +| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read | +| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 | +| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes | +| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes | +| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 | +| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors | +| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream | +| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream | +| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream | +| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error | +| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` | +| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories | +| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" | +| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream | +| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` | +| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" | +| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations | +| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" | +| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` | +| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream | +| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying | +| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream | +| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream | +| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching | +| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams | +| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages | +| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` | +| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` | +| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy | +| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` | +| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec | +| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream | +| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling | +| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns | +| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta | +| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code | +| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` | +| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` | +| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name | +| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits | +| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time | +| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. | +| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream | +| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams | +| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` | +| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | +| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | +| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | +| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub | +| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams | +| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos | +| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas | +| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability | +| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification | +| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches | +| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling | +| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams | +| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams | +| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream | +| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams | +| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values | +| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator | +| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation | +| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations | +| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams | +| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting | +| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream | +| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub | + +
\ No newline at end of file diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/with_not_required_steps.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/with_not_required_steps.md new file mode 100644 index 000000000000..942837b08a73 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/with_not_required_steps.md @@ -0,0 +1,123 @@ +# Oracle Netsuite + +One unified business management suite, encompassing ERP/Financials, CRM and ecommerce for more than 31,000 customers. + +This connector implements the [SuiteTalk REST Web Services](https://docs.oracle.com/en/cloud/saas/netsuite/ns-online-help/chapter_1540391670.html) and uses REST API to fetch the customers data. + +## Prerequisites +* Oracle NetSuite [account](https://system.netsuite.com/pages/customerlogin.jsp?country=US) +* Allowed access to all Account permissions options + +## Setup guide +### Step 1: Create Oracle NetSuite account + +1. Create [account](https://system.netsuite.com/pages/customerlogin.jsp?country=US) on Oracle NetSuite +2. Confirm your Email + +### Step 2: Setup NetSuite account +#### Step 2.1: Obtain Realm info +1. Login into your NetSuite [account](https://system.netsuite.com/pages/customerlogin.jsp?country=US) +2. Go to **Setup** » **Company** » **Company Information** +3. Copy your Account ID (Realm). It should look like **1234567** for the `Production` env. or **1234567_SB2** - for a `Sandbox` +#### Step 2.2: Enable features +1. Go to **Setup** » **Company** » **Enable Features** +2. Click on **SuiteCloud** tab +3. Scroll down to **SuiteScript** section +4. Enable checkbox for `CLIENT SUITESCRIPT` and `SERVER SUITESCRIPT` +5. Scroll down to **Manage Authentication** section +6. Enable checkbox `TOKEN-BASED AUTHENTICATION` +7. Scroll down to **SuiteTalk (Web Services)** +8. Enable checkbox `REST WEB SERVISES` +9. Save the changes +#### Step 2.3: Create Integration (obtain Consumer Key and Consumer Secret) +1. Go to **Setup** » **Integration** » **Manage Integrations** » **New** +2. Fill the **Name** field (we recommend to put `airbyte-rest-integration` for a name) +3. Make sure the **State** is `enabled` +4. Enable checkbox `Token-Based Authentication` in **Authentication** section +5. Save changes +6. After that, **Consumer Key** and **Consumer Secret** will be showed once (copy them to the safe place) +#### Step 2.4: Setup Role +1. Go to **Setup** » **Users/Roles** » **Manage Roles** » **New** +2. Fill the **Name** field (we recommend to put `airbyte-integration-role` for a name) +3. Scroll down to **Permissions** tab +4. (REQUIRED) Click on `Transactions` and manually `add` all the dropdown entities with either `full` or `view` access level. +5. (REQUIRED) Click on `Reports` and manually `add` all the dropdown entities with either `full` or `view` access level. +6. (REQUIRED) Click on `Lists` and manually `add` all the dropdown entities with either `full` or `view` access level. +7. (REQUIRED) Click on `Setup` and manually `add` all the dropdown entities with either `full` or `view` access level. +* Make sure you've done all `REQUIRED` steps correctly, to avoid sync issues in the future. +* Please edit these params again when you `rename` or `customise` any `Object` in Netsuite for `airbyte-integration-role` to reflect such changes. + +#### Step 2.5: Setup User +1. Go to **Setup** » **Users/Roles** » **Manage Users** +2. In column `Name` click on the user’s name you want to give access to the `airbyte-integration-role` +3. Then click on **Edit** button under the user’s name +4. Scroll down to **Access** tab at the bottom +5. Select from dropdown list the `airbyte-integration-role` role which you created in step 2.4 +6. Save changes + +#### Step 2.6: Create Access Token for role +1. Go to **Setup** » **Users/Roles** » **Access Tokens** » **New** +2. Select an **Application Name** +3. Under **User** select the user you assigned the `airbyte-integration-role` in the step **2.4** +4. Inside **Role** select the one you gave to the user in the step **2.5** +5. Under **Token Name** you can give a descriptive name to the Token you are creating (we recommend to put `airbyte-rest-integration-token` for a name) +6. Save changes +7. After that, **Token ID** and **Token Secret** will be showed once (copy them to the safe place) + +#### Step 2.7: Summary +You have copied next parameters +* Realm (Account ID) +* Consumer Key +* Consumer Secret +* Token ID +* Token Secret +Also you have properly **Configured Account** with **Correct Permissions** and **Access Token** for User and Role you've created early. + +### Step 3: Set up the source connector in Airbyte +### For Airbyte Cloud: + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account. +2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**. +3. On the source setup page, select **NetSuite** from the Source type dropdown and enter a name for this connector. +4. Add **Realm** +5. Add **Consumer Key** +6. Add **Consumer Secret** +7. Add **Token ID** +8. Add **Token Secret** +9. Click `Set up source`. + +### For Airbyte OSS: + +1. Go to local Airbyte page. +2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**. +3. On the source setup page, select **NetSuite** from the Source type dropdown and enter a name for this connector. +4. Add **Realm** +5. Add **Consumer Key** +6. Add **Consumer Secret** +7. Add **Token ID** +8. Add **Token Secret** +9. Click `Set up source` + + +## Supported sync modes + +The NetSuite source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): + - Full Refresh + - Incremental + +## Supported Streams + +- Streams are generated based on `ROLE` and `USER` access to them as well as `Account` settings, make sure you're using the correct role assigned in our case `airbyte-integration-role` or any other custom `ROLE` granted to the Access Token, having the access to the NetSuite objects for data sync, please refer to the **Setup guide** > **Step 2.4** and **Setup guide** > **Step 2.5** + + +## Performance considerations + +The connector is restricted by Netsuite [Concurrency Limit per Integration](https://docs.oracle.com/en/cloud/saas/netsuite/ns-online-help/bridgehead_156224824287.html). + +## Changelog + +| Version | Date | Pull Request | Subject | +| :------ | :--------- | :------------------------------------------------------- | :-------------------------- | +| 0.1.3 | 2023-01-20 | [21645](https://github.com/airbytehq/airbyte/pull/21645) | Minor issues fix, Setup Guide corrections for public docs | +| 0.1.1 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state | +| 0.1.0 | 2022-09-15 | [16093](https://github.com/airbytehq/airbyte/pull/16093) | Initial Alpha release | diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/test_documentation.py b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/test_documentation.py new file mode 100644 index 000000000000..5a602e85a2e7 --- /dev/null +++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/test_documentation.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +from pathlib import Path + +import pytest +from airbyte_protocol.models import ConnectorSpecification +from connector_acceptance_test import conftest +from connector_acceptance_test.tests.test_core import TestConnectorDocumentation as _TestConnectorDocumentation + + +@pytest.mark.parametrize( + "connector_spec, docs_path, should_fail", + ( + # SUCCESS: required field from spec exists in Prerequisites section + ( + {"required": ["start_date"], "properties": {"start_date": {"title": "Start Date"}}}, + "data/docs/incorrect_not_all_structure.md", + False + ), + # FAIL: required field from spec does not exist in Prerequisites section + ( + {"required": ["access_token"], "properties": {"access_token": {"title": "Access Token"}}}, + "data/docs/incorrect_not_all_structure.md", + True + ) + ) +) +def test_documentation_prerequisites_section(connector_spec, docs_path, should_fail): + t = _TestConnectorDocumentation() + docs_path = Path(__file__).parent / docs_path + with open(docs_path, "r") as f: + documentation = f.read().rstrip() + + if should_fail is True: + with pytest.raises(AssertionError): + t.test_prerequisites_content(True, ConnectorSpecification(connectionSpecification=connector_spec), documentation, docs_path) + else: + t.test_prerequisites_content(True, ConnectorSpecification(connectionSpecification=connector_spec), documentation, docs_path) + + +@pytest.mark.parametrize( + "metadata, docs_path, should_fail, failure", + ( + # FAIL: Docs does not have required headers from standard template + ( + {"data": {"name": "GitHub"}}, + "data/docs/incorrect_not_all_structure.md", + True, + "Missing headers:", + ), + # FAIL: Docs does not have required headers from standard template + ( + {"data": {"name": "Oracle Netsuite"}}, + "data/docs/with_not_required_steps.md", + True, + "Actual Heading: 'Create Oracle NetSuite account'. Possible correct heading", + ), + # # SUCCESS: Docs follow standard template + ( + {"data": {"name": "GitHub"}}, + "data/docs/correct.md", + False, + "", + ), + # Fail: Incorrect header order + ( + {"data": {"name": "GitHub"}}, + "data/docs/incorrect_header_order.md", + True, + "Actual Heading: 'Prerequisites'. Expected Heading: 'GitHub'", + ), + ) +) +def test_docs_structure_is_correct(mocker, metadata, docs_path, should_fail, failure): + t = _TestConnectorDocumentation() + + docs_path = Path(__file__).parent / docs_path + with open(docs_path, "r") as f: + documentation = f.read().rstrip() + + if should_fail: + with pytest.raises(BaseException) as e: + t.test_docs_structure(True, documentation, metadata) + assert e.match(failure) + else: + t.test_docs_structure(True, documentation, metadata) + + +@pytest.mark.parametrize( + "metadata, docs_path, should_fail", + ( + # FAIL: Prerequisites section does not follow standard template + ( + {"data": {"name": "GitHub"}}, + "data/docs/incorrect_not_all_structure.md", + True, + ), + # SUCCESS: Section descriptions follow standard template + ( + {"data": {"name": "GitHub"}}, + "data/docs/correct.md", + False, + ), + # SUCCESS: Section descriptions follow standard template + ( + {"data": {"name": "GitHub"}}, + "data/docs/correct_all_description_exist.md", + False, + ), + ) +) +def test_docs_description(mocker, metadata, docs_path, should_fail): + mocker.patch.object(conftest.pytest, "fail") + + t = _TestConnectorDocumentation() + + docs_path = Path(__file__).parent / docs_path + with open(docs_path, "r") as f: + documentation = f.read().rstrip() + + if should_fail is True: + with pytest.raises(AssertionError): + t.test_docs_descriptions(True, docs_path, documentation, metadata) + else: + t.test_docs_descriptions(True, docs_path, documentation, metadata) + + +@pytest.mark.parametrize( + ("docs_path", "should_fail"), + ( + ( + "data/docs/correct_all_description_exist.md", + False, + ), + ( + "data/docs/invalid_links.md", + True, + ), + ( + "data/docs/correct.md", + False, + ), + ) +) +def test_docs_urls(docs_path, should_fail): + t = _TestConnectorDocumentation() + docs_path = Path(__file__).parent / docs_path + with open(docs_path, "r") as f: + documentation = f.read().rstrip() + + if should_fail is True: + with pytest.raises(AssertionError): + t.test_validate_links(True, documentation) + else: + t.test_validate_links(True, documentation) diff --git a/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md b/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md index e66c4cecd927..5c38d69cc276 100644 --- a/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md +++ b/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md @@ -303,6 +303,16 @@ Some examples of the types of tests covered are verification that streams define | `allowed_hosts.bypass_reason` | object with `bypass_reason` | None | Defines the `bypass_reason` description about why the `allowedHosts` check for the certified connector should be skipped | | `suggested_streams.bypass_reason` | object with `bypass_reason` | None | Defines the `bypass_reason` description about why the `suggestedStreams` check for the certified connector should be skipped | +## Test Connector Documentation + +Verifies that connectors documentation follows our standard template, does have correct order of headings, +does not have missing headings and all required fields in Prerequisites section. + +| Input | Type | Default | Note | +|:------------------|:-------|:----------------------|:-------------------------------------------------------------------| +| `config_path` | string | `secrets/config.json` | Path to a JSON object representing a valid connector configuration | +| `timeout_seconds` | int | 20\*60 | Test execution timeout in seconds | + ## Strictness level To enforce maximal coverage of acceptances tests we expose a `test_strictness_level` field at the root of the `acceptance-test-config.yml` configuration.