diff --git a/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md
index b7e87e84ed86..a41617cf130f 100644
--- a/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md
+++ b/airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md
@@ -1,5 +1,8 @@
# Changelog
+## 3.4.0
+Add TestConnectorDocumentation suite for validating connectors documentation structure and content.
+
## 3.3.3
Аix `NoAdditionalPropertiesValidator` if no type found in `items`
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py
index f269fe15db0a..fbb823185b52 100644
--- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/config.py
@@ -259,6 +259,11 @@ class ConnectorAttributesConfig(BaseConfig):
)
+class TestConnectorDocumentationConfig(BaseConfig):
+ timeout_seconds: int = timeout_seconds
+ config_path: str = config_path
+
+
class GenericTestConfig(GenericModel, Generic[TestConfigT]):
bypass_reason: Optional[str]
tests: Optional[List[TestConfigT]]
@@ -278,6 +283,7 @@ class AcceptanceTestConfigurations(BaseConfig):
full_refresh: Optional[GenericTestConfig[FullRefreshConfig]]
incremental: Optional[GenericTestConfig[IncrementalConfig]]
connector_attributes: Optional[GenericTestConfig[ConnectorAttributesConfig]]
+ connector_documentation: Optional[GenericTestConfig[TestConnectorDocumentationConfig]]
class Config(BaseConfig):
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py
index f0df880650fa..dd3f6b5701c5 100644
--- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/conftest.py
@@ -395,3 +395,16 @@ def pytest_sessionfinish(session, exitstatus):
@pytest.fixture(name="connector_metadata")
def connector_metadata_fixture(base_path) -> dict:
return load_yaml_or_json_path(base_path / "metadata.yaml")
+
+
+@pytest.fixture(name="docs_path")
+def docs_path_fixture(base_path, connector_metadata) -> Path:
+ path_to_docs = connector_metadata["data"]["documentationUrl"].replace("https://docs.airbyte.com", "docs") + ".md"
+ airbyte_path = Path(base_path).parents[6]
+ return airbyte_path / path_to_docs
+
+
+@pytest.fixture(name="connector_documentation")
+def connector_documentation_fixture(docs_path: str) -> str:
+ with open(docs_path, "r") as f:
+ return f.read().rstrip()
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py
index be9bde14f32c..5236bba39b4b 100644
--- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/__init__.py
@@ -2,8 +2,8 @@
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#
-from .test_core import TestBasicRead, TestConnection, TestConnectorAttributes, TestDiscovery, TestSpec
+from .test_core import TestBasicRead, TestConnection, TestConnectorAttributes, TestDiscovery, TestSpec, TestConnectorDocumentation
from .test_full_refresh import TestFullRefresh
from .test_incremental import TestIncremental
-__all__ = ["TestSpec", "TestBasicRead", "TestConnection", "TestConnectorAttributes", "TestDiscovery", "TestFullRefresh", "TestIncremental"]
+__all__ = ["TestSpec", "TestBasicRead", "TestConnection", "TestConnectorAttributes", "TestDiscovery", "TestFullRefresh", "TestIncremental", "TestConnectorDocumentation"]
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_cloud.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_cloud.txt
new file mode 100644
index 000000000000..54946b70acdc
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_cloud.txt
@@ -0,0 +1,5 @@
+
+1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
+2. Click Sources and then click + New source/destination.
+3. On the Set up the source page, select {connector_name} from the Source type dropdown.
+4. Enter a name for the {connector_name} connector.
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_open_source.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_open_source.txt
new file mode 100644
index 000000000000..c5249d21023a
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/for_airbyte_open_source.txt
@@ -0,0 +1,2 @@
+
+1. Navigate to the Airbyte Open Source dashboard.
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/source.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/source.txt
new file mode 100644
index 000000000000..d7b36f402209
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/source.txt
@@ -0,0 +1,6 @@
+
+
+
+This page contains the setup guide and reference information for the [{connector_name}]({docs_link}) source connector.
+
+
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/supported_sync_modes.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/supported_sync_modes.txt
new file mode 100644
index 000000000000..51e8f806ff65
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/supported_sync_modes.txt
@@ -0,0 +1,2 @@
+
+The {connector_name} source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/tutorials.txt b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/tutorials.txt
new file mode 100644
index 000000000000..584c04f8daf5
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/doc_templates/tutorials.txt
@@ -0,0 +1,2 @@
+
+Now that you have set up the {connector_name} source connector, check out the following {connector_name} tutorials:
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py
index dab1ba148b9c..a91617944a79 100644
--- a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/tests/test_core.py
@@ -10,12 +10,16 @@
from functools import reduce
from logging import Logger
from os.path import splitext
+from pathlib import Path
+from threading import Thread
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set, Tuple
from xmlrpc.client import Boolean
+import connector_acceptance_test.utils.docs as docs_utils
import dpath.util
import jsonschema
import pytest
+import requests
from airbyte_protocol.models import (
AirbyteRecordMessage,
AirbyteStream,
@@ -1335,3 +1339,192 @@ async def test_certified_connector_has_suggested_streams(
assert (
has_assigned_suggested_streams
), f"The `streams` empty list is not allowed for `metadata.data.suggestedStreams` for certified connectors."
+
+
+class TestConnectorDocumentation(BaseTest):
+ MANDATORY_FOR_TEST_STRICTNESS_LEVELS = [] # Used so that this is not part of the mandatory high strictness test suite yet
+
+ PREREQUISITES = "Prerequisites"
+ HEADING = "heading"
+ CREDENTIALS_KEYWORDS = ["account", "auth", "credentials", "access"]
+ CONNECTOR_SPECIFIC_HEADINGS = ""
+
+ @pytest.fixture(name="operational_certification_test")
+ async def operational_certification_test_fixture(self, connector_metadata: dict) -> bool:
+ """
+ Fixture that is used to skip a test that is reserved only for connectors that are supposed to be tested
+ against operational certification criteria
+ """
+ if connector_metadata.get("data", {}).get("ab_internal", {}).get("ql") < 400:
+ pytest.skip("Skipping testing source connector documentation due to low ql.")
+ return True
+
+ def _get_template_headings(self, connector_name: str) -> tuple[tuple[str], tuple[str]]:
+ """
+ https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw - standard template
+ Headings in order to docs structure.
+ """
+ all_headings = (
+ connector_name,
+ "Prerequisites",
+ "Setup guide",
+ f"Set up {connector_name}",
+ "For Airbyte Cloud:",
+ "For Airbyte Open Source:",
+ f"Set up the {connector_name} connector in Airbyte",
+ "For Airbyte Cloud:",
+ "For Airbyte Open Source:",
+ "Supported sync modes",
+ "Supported Streams",
+ self.CONNECTOR_SPECIFIC_HEADINGS,
+ "Performance considerations",
+ "Data type map",
+ "Troubleshooting",
+ "Tutorials",
+ "Changelog",
+ )
+ not_required_heading = (
+ f"Set up the {connector_name} connector in Airbyte",
+ "For Airbyte Cloud:",
+ "For Airbyte Open Source:",
+ self.CONNECTOR_SPECIFIC_HEADINGS,
+ "Performance considerations",
+ "Data type map",
+ "Troubleshooting",
+ "Tutorials",
+ )
+ return all_headings, not_required_heading
+
+ def _headings_description(self, connector_name: str) -> dict[str:Path]:
+ """
+ Headings with path to file with template description
+ """
+ descriptions_paths = {
+ connector_name: Path(__file__).parent / "doc_templates/source.txt",
+ "For Airbyte Cloud:": Path(__file__).parent / "doc_templates/for_airbyte_cloud.txt",
+ "For Airbyte Open Source:": Path(__file__).parent / "doc_templates/for_airbyte_open_source.txt",
+ "Supported sync modes": Path(__file__).parent / "doc_templates/supported_sync_modes.txt",
+ "Tutorials": Path(__file__).parent / "doc_templates/tutorials.txt",
+ }
+ return descriptions_paths
+
+ def test_prerequisites_content(
+ self, operational_certification_test, actual_connector_spec: ConnectorSpecification, connector_documentation: str, docs_path: str
+ ):
+ node = docs_utils.documentation_node(connector_documentation)
+ header_line_map = {docs_utils.header_name(n): n.map[1] for n in node if n.type == self.HEADING}
+ headings = tuple(header_line_map.keys())
+
+ if not header_line_map.get(self.PREREQUISITES):
+ pytest.fail(f"Documentation does not have {self.PREREQUISITES} section.")
+
+ prereq_start_line = header_line_map[self.PREREQUISITES]
+ prereq_end_line = docs_utils.description_end_line_index(self.PREREQUISITES, headings, header_line_map)
+
+ with open(docs_path, "r") as docs_file:
+ prereq_content_lines = docs_file.readlines()[prereq_start_line:prereq_end_line]
+ # adding real character to avoid accidentally joining lines into a wanted title.
+ prereq_content = "|".join(prereq_content_lines).lower()
+ required_titles, has_credentials = docs_utils.required_titles_from_spec(actual_connector_spec.connectionSpecification)
+
+ for title in required_titles:
+ assert title in prereq_content, (
+ f"Required '{title}' field is not in {self.PREREQUISITES} section " f"or title in spec doesn't match name in the docs."
+ )
+
+ if has_credentials:
+ # credentials has specific check for keywords as we have a lot of way how to describe this step
+ credentials_validation = [k in prereq_content for k in self.CREDENTIALS_KEYWORDS]
+ assert True in credentials_validation, f"Required 'credentials' field is not in {self.PREREQUISITES} section."
+
+ def test_docs_structure(self, operational_certification_test, connector_documentation: str, connector_metadata: dict):
+ """
+ test_docs_structure gets all top-level headers from source documentation file and check that the order is correct.
+ The order of the headers should follow our standard template https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw.
+ _get_template_headings returns tuple of headers as in standard template and non-required headers that might nor be in the source docs.
+ CONNECTOR_SPECIFIC_HEADINGS value in list of required headers that shows a place where should be a connector specific headers,
+ which can be skipped as out of standard template and depend of connector.
+ """
+
+ heading_names = docs_utils.prepare_headers(connector_documentation)
+ template_headings, non_required_heading = self._get_template_headings(connector_metadata["data"]["name"])
+
+ heading_names_len, template_headings_len = len(heading_names), len(template_headings)
+ heading_names_index, template_headings_index = 0, 0
+
+ while heading_names_index < heading_names_len and template_headings_index < template_headings_len:
+ heading_names_value = heading_names[heading_names_index]
+ template_headings_value = template_headings[template_headings_index]
+ # check that template header is specific for connector and actual header should not be validated
+ if template_headings_value == self.CONNECTOR_SPECIFIC_HEADINGS:
+ # check that actual header is not in required headers, as required headers should be on a right place and order
+ if heading_names_value not in template_headings:
+ heading_names_index += 1 # go to the next actual header as CONNECTOR_SPECIFIC_HEADINGS can be more than one
+ continue
+ else:
+ # if actual header is required go to the next template header to validate actual header order
+ template_headings_index += 1
+ continue
+ # strict check that actual header equals template header
+ if heading_names_value == template_headings_value:
+ # found expected header, go to the next header in template and actual headers
+ heading_names_index += 1
+ template_headings_index += 1
+ continue
+ # actual header != template header means that template value is not required and can be skipped
+ if template_headings_value in non_required_heading:
+ # found non-required header, go to the next template header to validate actual header
+ template_headings_index += 1
+ continue
+ # any check is True, indexes didn't move to the next step
+ pytest.fail(docs_utils.reason_titles_not_match(heading_names_value, template_headings_value, template_headings))
+ # indexes didn't move to the last required one, so some headers are missed
+ if template_headings_index != template_headings_len:
+ pytest.fail(docs_utils.reason_missing_titles(template_headings_index, template_headings))
+
+ def test_docs_descriptions(
+ self, operational_certification_test, docs_path: str, connector_documentation: str, connector_metadata: dict
+ ):
+ connector_name = connector_metadata["data"]["name"]
+ template_descriptions = self._headings_description(connector_name)
+
+ node = docs_utils.documentation_node(connector_documentation)
+ header_line_map = {docs_utils.header_name(n): n.map[1] for n in node if n.type == self.HEADING}
+ actual_headings = tuple(header_line_map.keys())
+
+ for heading, description in template_descriptions.items():
+ if heading in actual_headings:
+
+ description_start_line = header_line_map[heading]
+ description_end_line = docs_utils.description_end_line_index(heading, actual_headings, header_line_map)
+
+ with open(docs_path, "r") as docs_file, open(description, "r") as template_file:
+
+ docs_description_content = docs_file.readlines()[description_start_line:description_end_line]
+ template_description_content = template_file.readlines()
+
+ for d, t in zip(docs_description_content, template_description_content):
+ d, t = docs_utils.prepare_lines_to_compare(connector_name, d, t)
+ assert d == t, f"Description for '{heading}' does not follow structure.\nExpected: {t} Actual: {d}"
+
+ def test_validate_links(self, operational_certification_test, connector_documentation: str):
+ valid_status_codes = [200, 403, 401, 405] # we skip 4xx due to needed access
+ links = re.findall("(https?://[^\s)]+)", connector_documentation)
+ invalid_links = []
+ threads = []
+
+ def validate_docs_links(docs_link):
+ response = requests.get(docs_link)
+ if response.status_code not in valid_status_codes:
+ invalid_links.append(docs_link)
+
+ for link in links:
+ process = Thread(target=validate_docs_links, args=[link])
+ process.start()
+ threads.append(process)
+
+ for process in threads:
+ process.join(timeout=30) # 30s timeout for process else link will be skipped
+ process.is_alive()
+
+ assert not invalid_links, f"{len(invalid_links)} invalid links were found in the connector documentation: {invalid_links}."
diff --git a/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/utils/docs.py b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/utils/docs.py
new file mode 100644
index 000000000000..2b29a65ce5e4
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/utils/docs.py
@@ -0,0 +1,133 @@
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+import re
+from difflib import get_close_matches
+from typing import Any
+
+from markdown_it import MarkdownIt
+from markdown_it.tree import SyntaxTreeNode
+
+
+def remove_step_from_heading(heading: str) -> str:
+ if "Step 1: " in heading:
+ return heading.replace("Step 1: ", "")
+ if "Step 2: " in heading:
+ return heading.replace("Step 2: ", "")
+ return heading
+
+
+def required_titles_from_spec(spec: dict[str, Any]) -> tuple[list[str], bool]:
+ has_credentials = False
+ spec_required = spec["required"]
+ spec_properties = spec["properties"].keys()
+ creds = ["credentials", "client_id", "client_secret", "access_token", "refresh_token"]
+
+ if "credentials" in spec["required"] or "client_id" in spec["required"] or "client_secret" in spec_required:
+ has_credentials = True
+ if "credentials" in spec["required"] or "client_id" in spec["required"] or "client_secret" in spec_properties:
+ has_credentials = True
+ if has_credentials:
+ [spec_required.remove(cred) for cred in creds if cred in spec_required]
+
+ titles = [spec["properties"][field]["title"].lower() for field in spec_required]
+ return titles, has_credentials
+
+
+def documentation_node(connector_documentation: str) -> SyntaxTreeNode:
+ md = MarkdownIt("commonmark")
+ tokens = md.parse(connector_documentation)
+ return SyntaxTreeNode(tokens)
+
+
+def header_name(n: SyntaxTreeNode) -> str:
+ return n.to_tokens()[1].children[0].content
+
+
+def prepare_lines_to_compare(connector_name: str, docs_line: str, template_line: str) -> tuple[str]:
+ def _replace_link(docs_string: str, link_to_replace: str) -> str:
+ try:
+ docs_string = docs_string[: docs_string.index("(")] + link_to_replace + docs_string[docs_string.index(")") + 1 :]
+ return docs_string
+ except ValueError: # ValueError if actual docs doesn't have expected links
+ return docs_string
+
+ connector_name_to_replace = "{connector_name}"
+ link_to_replace = "({docs_link})"
+
+ template_line = (
+ template_line.replace(connector_name_to_replace, connector_name) if connector_name_to_replace in template_line else template_line
+ )
+ docs_line = _replace_link(docs_line, link_to_replace) if link_to_replace in template_line else docs_line
+
+ return docs_line, template_line
+
+
+def remove_not_required_step_headers(headers: tuple[str]) -> tuple[str]:
+ """
+ Removes headers like Step 1.1 Step 3 Step 2.3 from actual headers, if they placed after Step 1: header.
+ from: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 1.11: do something 11",
+ "Step 2: do something 2", "Step 2.1: do something 2.1", "Changelog"
+ To: "Connector name", "Prerequisites", "Setup guide", "Step 1: do something 1", "Step 2: do something 2", "Changelog"
+ This is connector specific headers, so we can ignore them.
+ """
+ step_one_index = None
+ for header in headers:
+ if re.search("Step 1: ", header):
+ step_one_index = headers.index(header)
+ if not step_one_index: # docs doesn't have Step 1 headers
+ return headers
+
+ step_headers = headers[step_one_index:]
+ pattern = "Step \d+.?\d*: "
+ step = "Step 1: "
+ i = 0
+ while i < len(step_headers):
+ if step in step_headers[i]: # if Step 1/2: is substring of current header
+ if i + 1 < len(step_headers) and re.match(pattern, step_headers[i + 1]): # check that header has Step x:
+ if "Step 2: " in step_headers[i + 1]: # found Step 2, it's required header, move to the next one
+ step = "Step 2: "
+ i += 1
+ continue
+ else:
+ step_headers.remove(step_headers[i + 1]) # remove all other steps from headers
+ continue # move to the next header after Step 1/2 header
+ else:
+ break
+ break
+
+ headers = headers[:step_one_index]
+ headers.extend(step_headers)
+ return headers
+
+
+def reason_titles_not_match(heading_names_value: str, template_headings_value: str, template_headings: list[str]) -> str:
+ reason = (
+ f"Documentation structure doesn't follow standard template. Heading '{heading_names_value}' is not in the right place, "
+ f"the name of heading is incorrect or the heading name is not expected.\n"
+ )
+ close_titles = get_close_matches(heading_names_value, template_headings)
+ if close_titles and close_titles[0] != heading_names_value:
+ diff = f"Diff:\nActual Heading: '{heading_names_value}'. Possible correct heading: '{close_titles}'. Expected Heading: '{template_headings_value}'."
+ else:
+ diff = f"Diff:\nActual Heading: '{heading_names_value}'. Expected Heading: '{template_headings_value}'"
+ return reason + diff
+
+
+def reason_missing_titles(template_headings_index: int, template_headings: list[str]) -> str:
+ return (
+ f"Documentation structure doesn't follow standard template. docs is not full."
+ f"\nMissing headers: {template_headings[template_headings_index:]}"
+ )
+
+
+def description_end_line_index(heading: str, actual_headings: list[str], header_line_map: dict[str, int]) -> int:
+ if actual_headings.index(heading) + 1 == len(actual_headings):
+ return -1
+ return header_line_map[actual_headings[actual_headings.index(heading) + 1]]
+
+
+def prepare_headers(connector_documentation: dict) -> list[str]:
+ node = documentation_node(connector_documentation)
+ headers = [header_name(n) for n in node if n.type == "heading"] # find all headers
+ headers = remove_not_required_step_headers(headers) # remove Step 1.1 Step 3 ... headers
+ headers = tuple([remove_step_from_heading(h) for h in headers]) # remove Step 1 and Step 2 from header name
+ return headers
diff --git a/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml b/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml
index e051c1949bc6..d090aaa6fe8c 100644
--- a/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml
+++ b/airbyte-integrations/bases/connector-acceptance-test/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "connector-acceptance-test"
-version = "3.3.3"
+version = "3.4.0"
description = "Contains acceptance tests for connectors."
authors = ["Airbyte "]
license = "MIT"
diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct.md
new file mode 100644
index 000000000000..6fdf34bba108
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct.md
@@ -0,0 +1,305 @@
+# GitHub
+
+
+
+This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector.
+
+
+
+## Prerequisites
+
+- List of GitHub Repositories (and access for them in case they are private)
+
+
+**For Airbyte Cloud:**
+
+- OAuth
+- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes))
+
+
+
+**For Airbyte Open Source:**
+
+- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes))
+
+
+## Setup guide
+
+### Step 1: Set up GitHub
+
+Create a [GitHub Account](https://github.com).
+
+
+**Airbyte Open Source additional setup steps**
+
+Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`.
+
+
+### Step 2: Set up the GitHub connector in Airbyte
+
+
+**For Airbyte Cloud:**
+
+1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
+2. In the left navigation bar, click **Sources**.
+3. On the source selection page, select **GitHub** from the list of Sources.
+4. Add a name for your GitHub connector.
+5. To authenticate:
+
+
+ - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account.
+
+
+
+ - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`.
+
+
+6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`.
+
+:::caution
+Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs.
+:::
+
+7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated.
+
+- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`.
+
+- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users`
+
+8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`).
+9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).
+
+
+
+## Supported sync modes
+
+The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
+
+- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/)
+- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append)
+- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append)
+- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped)
+
+## Supported Streams
+
+This connector outputs the following full refresh streams:
+
+- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees)
+- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches)
+- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity)
+- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators)
+- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository)
+- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations)
+- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request)
+- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags)
+- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members)
+- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user)
+- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams)
+- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members)
+- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue)
+
+This connector outputs the following incremental streams:
+
+- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository)
+- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment)
+- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository)
+- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits)
+- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments)
+- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events)
+- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment)
+- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository)
+- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones)
+- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue)
+- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues)
+- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards)
+- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns)
+- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects)
+- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2)
+- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment)
+- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest)
+- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests)
+- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases)
+- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories)
+- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository)
+- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request)
+- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers)
+- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run)
+- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository)
+- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows)
+
+### Notes
+
+1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they:
+
+ - read only new records;
+ - output only new records.
+
+2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental:
+
+ - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs);
+ - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run);
+ - output only new records.
+
+3. Other 19 incremental streams are also incremental but with one difference, they:
+
+ - read all records;
+ - output only new records.
+ Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits.
+
+4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help.
+ **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:**
+
+- `assignees`
+- `branches`
+- `collaborators`
+- `issue_labels`
+- `organizations`
+- `pull_request_commits`
+- `pull_request_stats`
+- `repositories`
+- `tags`
+- `teams`
+- `users`
+
+## Limitations & Troubleshooting
+
+
+
+Expand to see details about GitHub connector limitations and troubleshooting.
+
+
+### Connector limitations
+
+#### Rate limiting
+The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).
+
+#### Permissions and scopes
+
+If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes.
+
+Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions:
+
+- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github).
+- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced.
+- To sync the Projects stream, the repository must have the Projects feature enabled.
+
+### Troubleshooting
+
+* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions)
+
+
+
+## Changelog
+
+| Version | Date | Pull Request | Subject |
+|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams |
+| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code |
+| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image |
+| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response |
+| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` |
+| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields |
+| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ |
+| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` |
+| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. |
+| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` |
+| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages |
+| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version |
+| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` |
+| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` |
+| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` |
+| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` |
+| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances |
+| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages |
+| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` |
+| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes |
+| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references |
+| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter |
+| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter |
+| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI |
+| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests |
+| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired |
+| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. |
+| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec |
+| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation |
+| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config |
+| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" |
+| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format |
+| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form |
+| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification |
+| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT |
+| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` |
+| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream |
+| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message |
+| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read |
+| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 |
+| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes |
+| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes |
+| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 |
+| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors |
+| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream |
+| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream |
+| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream |
+| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. |
+| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error |
+| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` |
+| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories |
+| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" |
+| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream |
+| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` |
+| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" |
+| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations |
+| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" |
+| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` |
+| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream |
+| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying |
+| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream |
+| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream |
+| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching |
+| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams |
+| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages |
+| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` |
+| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` |
+| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy |
+| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` |
+| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec |
+| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream |
+| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling |
+| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns |
+| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta |
+| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code |
+| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` |
+| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` |
+| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name |
+| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits |
+| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time |
+| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. |
+| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream |
+| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams |
+| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` |
+| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response |
+| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental |
+| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses |
+| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description |
+| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub |
+| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams |
+| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos |
+| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas |
+| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability |
+| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification |
+| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches |
+| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling |
+| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams |
+| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams |
+| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream |
+| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams |
+| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values |
+| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator |
+| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation |
+| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations |
+| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams |
+| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting |
+| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream |
+| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub |
+
+
\ No newline at end of file
diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct_all_description_exist.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct_all_description_exist.md
new file mode 100644
index 000000000000..43110297ed9d
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/correct_all_description_exist.md
@@ -0,0 +1,38 @@
+# GitHub
+
+
+
+This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector.
+
+
+
+## For Airbyte Cloud:
+
+1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
+2. Click Sources and then click + New source/destination.
+3. On the Set up the source page, select GitHub from the Source type dropdown.
+4. Enter a name for the GitHub connector.
+5. Add list of GitHub repositories you want to sync.
+6. Add Start Date from with data will be replicated.
+
+## For Airbyte Open Source:
+
+1. Navigate to the Airbyte Open Source dashboard.
+2. Click Sources and then click + New source/destination.
+3. On the Set up the source page, select GitHub from the Source type dropdown.
+4. Enter a name for the GitHub connector.
+5. Add list of GitHub repositories you want to sync.
+6. Add Start Date from with data will be replicated.
+
+## Supported sync modes
+
+The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
+
+- Full Refresh
+- Incremental
+
+## Tutorials
+
+Now that you have set up the GitHub source connector, check out the following GitHub tutorials:
+
+- [Creating PAT](https://docs.github.com/en/enterprise-server@3.9/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens)
diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_header_order.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_header_order.md
new file mode 100644
index 000000000000..3feb237735a7
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_header_order.md
@@ -0,0 +1,306 @@
+## Prerequisites
+
+- List of GitHub Repositories (and access for them in case they are private)
+-
+# GitHub
+
+
+
+This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector.
+
+
+
+
+
+**For Airbyte Cloud:**
+
+- OAuth
+- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes))
+
+
+
+**For Airbyte Open Source:**
+
+- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes))
+
+
+### Step 1: Set up GitHub
+
+## Setup guide
+
+Create a [GitHub Account](https://github.com).
+
+
+**Airbyte Open Source additional setup steps**
+
+Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`.
+
+
+### Step 2: Set up the GitHub connector in Airbyte
+
+
+**For Airbyte Cloud:**
+
+1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
+2. In the left navigation bar, click **Sources**.
+3. On the source selection page, select **GitHub** from the list of Sources.
+4. Add a name for your GitHub connector.
+5. To authenticate:
+
+
+ - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account.
+
+
+
+ - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`.
+
+
+6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`.
+
+:::caution
+Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs.
+:::
+
+7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated.
+
+- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`.
+
+- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users`
+
+8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`).
+9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).
+
+
+
+## Supported sync modes
+
+The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
+
+- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/)
+- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append)
+- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append)
+- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped)
+
+## Supported Streams
+
+This connector outputs the following full refresh streams:
+
+- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees)
+- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches)
+- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity)
+- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators)
+- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository)
+- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations)
+- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request)
+- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags)
+- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members)
+- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user)
+- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams)
+- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members)
+- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue)
+
+This connector outputs the following incremental streams:
+
+- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository)
+- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment)
+- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository)
+- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits)
+- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments)
+- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events)
+- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment)
+- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository)
+- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones)
+- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue)
+- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues)
+- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards)
+- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns)
+- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects)
+- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2)
+- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment)
+- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest)
+- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests)
+- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases)
+- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories)
+- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository)
+- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request)
+- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers)
+- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run)
+- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository)
+- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows)
+
+### Notes
+
+1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they:
+
+ - read only new records;
+ - output only new records.
+
+2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental:
+
+ - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs);
+ - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run);
+ - output only new records.
+
+3. Other 19 incremental streams are also incremental but with one difference, they:
+
+ - read all records;
+ - output only new records.
+ Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits.
+
+4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help.
+ **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:**
+
+- `assignees`
+- `branches`
+- `collaborators`
+- `issue_labels`
+- `organizations`
+- `pull_request_commits`
+- `pull_request_stats`
+- `repositories`
+- `tags`
+- `teams`
+- `users`
+
+## Limitations & Troubleshooting
+
+
+
+Expand to see details about GitHub connector limitations and troubleshooting.
+
+
+### Connector limitations
+
+#### Rate limiting
+The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).
+
+#### Permissions and scopes
+
+If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes.
+
+Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions:
+
+- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github).
+- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced.
+- To sync the Projects stream, the repository must have the Projects feature enabled.
+
+### Troubleshooting
+
+* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions)
+
+
+
+## Changelog
+
+| Version | Date | Pull Request | Subject |
+|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams |
+| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code |
+| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image |
+| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response |
+| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` |
+| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields |
+| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ |
+| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` |
+| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. |
+| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` |
+| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages |
+| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version |
+| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` |
+| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` |
+| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` |
+| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` |
+| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances |
+| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages |
+| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` |
+| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes |
+| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references |
+| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter |
+| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter |
+| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI |
+| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests |
+| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired |
+| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. |
+| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec |
+| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation |
+| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config |
+| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" |
+| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format |
+| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form |
+| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification |
+| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT |
+| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` |
+| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream |
+| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message |
+| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read |
+| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 |
+| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes |
+| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes |
+| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 |
+| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors |
+| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream |
+| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream |
+| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream |
+| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. |
+| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error |
+| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` |
+| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories |
+| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" |
+| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream |
+| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` |
+| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" |
+| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations |
+| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" |
+| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` |
+| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream |
+| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying |
+| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream |
+| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream |
+| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching |
+| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams |
+| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages |
+| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` |
+| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` |
+| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy |
+| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` |
+| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec |
+| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream |
+| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling |
+| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns |
+| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta |
+| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code |
+| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` |
+| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` |
+| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name |
+| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits |
+| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time |
+| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. |
+| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream |
+| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams |
+| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` |
+| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response |
+| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental |
+| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses |
+| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description |
+| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub |
+| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams |
+| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos |
+| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas |
+| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability |
+| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification |
+| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches |
+| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling |
+| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams |
+| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams |
+| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream |
+| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams |
+| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values |
+| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator |
+| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation |
+| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations |
+| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams |
+| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting |
+| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream |
+| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub |
+
+
\ No newline at end of file
diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_not_all_structure.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_not_all_structure.md
new file mode 100644
index 000000000000..b041e8cc78d4
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/incorrect_not_all_structure.md
@@ -0,0 +1,6 @@
+## GitHub
+
+## Prerequisites
+
+- Start Date - the start date to replicate your date.
+
diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/invalid_links.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/invalid_links.md
new file mode 100644
index 000000000000..1efb99d700d7
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/invalid_links.md
@@ -0,0 +1,305 @@
+# GitHub
+
+
+
+This page contains the setup guide and reference information for the [GitHub](https://www.github.com) source connector.
+
+
+
+## Prerequisites
+
+- List of GitHub Repositories (and access for them in case they are private)
+
+
+**For Airbyte Cloud:**
+
+- OAuth
+- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes))
+
+
+
+**For Airbyte Open Source:**
+
+- Personal Access Token (see [Permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes))
+
+
+## Setup guide
+
+### Step 1: Set up GitHub
+
+Create a [GitHub Account](https://github.com).
+
+
+**Airbyte Open Source additional setup steps**
+
+Log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens-that_do_not_exist). To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`.
+
+
+### Step 2: Set up the GitHub connector in Airbyte
+
+
+**For Airbyte Cloud:**
+
+1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
+2. In the left navigation bar, click **Sources**.
+3. On the source selection page, select **GitHub** from the list of Sources.
+4. Add a name for your GitHub connector.
+5. To authenticate:
+
+
+ - **For Airbyte Cloud:** **Authenticate your GitHub account** to authorize your GitHub account. Airbyte will authenticate the GitHub account you are already logged in to. Please make sure you are logged into the right account.
+
+
+
+ - **For Airbyte Open Source:** Authenticate with **Personal Access Token**. To generate a personal access token, log into [GitHub](https://github.com) and then generate a [personal access token](https://github.com/settings/tokens). Enter your GitHub personal access token. To load balance your API quota consumption across multiple API tokens, input multiple tokens separated with `,`.
+
+
+6. **GitHub Repositories** - Enter a list of GitHub organizations/repositories, e.g. `airbytehq/airbyte` for single repository, `airbytehq/airbyte airbytehq/another-repo` for multiple repositories. If you want to specify the organization to receive data from all its repositories, then you should specify it according to the following example: `airbytehq/*`.
+
+:::caution
+Repositories with the wrong name or repositories that do not exist or have the wrong name format will be skipped with `WARN` message in the logs.
+:::
+
+7. **Start date (Optional)** - The date from which you'd like to replicate data for streams. For streams which support this configuration, only data generated on or after the start date will be replicated.
+
+- These streams will only sync records generated on or after the **Start Date**: `comments`, `commit_comment_reactions`, `commit_comments`, `commits`, `deployments`, `events`, `issue_comment_reactions`, `issue_events`, `issue_milestones`, `issue_reactions`, `issues`, `project_cards`, `project_columns`, `projects`, `pull_request_comment_reactions`, `pull_requests`, `pull_requeststats`, `releases`, `review_comments`, `reviews`, `stargazers`, `workflow_runs`, `workflows`.
+
+- The **Start Date** does not apply to the streams below and all data will be synced for these streams: `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users`
+
+8. **Branch (Optional)** - List of GitHub repository branches to pull commits from, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`).
+9. **Max requests per hour (Optional)** - The GitHub API allows for a maximum of 5,000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).
+
+
+
+## Supported sync modes
+
+The GitHub source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes):
+
+- [Full Refresh - Overwrite](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-overwrite/)
+- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append)
+- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append)
+- [Incremental Sync - Append + Deduped](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append-deduped)
+
+## Supported Streams
+
+This connector outputs the following full refresh streams:
+
+- [Assignees](https://docs.github.com/en/rest/issues/assignees?apiVersion=2022-11-28#list-assignees)
+- [Branches](https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28#list-branches)
+- [Contributor Activity](https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-all-contributor-commit-activity)
+- [Collaborators](https://docs.github.com/en/rest/collaborators/collaborators?apiVersion=2022-11-28#list-repository-collaborators)
+- [Issue labels](https://docs.github.com/en/rest/issues/labels?apiVersion=2022-11-28#list-labels-for-a-repository)
+- [Organizations](https://docs.github.com/en/rest/orgs/orgs?apiVersion=2022-11-28#list-organizations)
+- [Pull request commits](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-commits-on-a-pull-request)
+- [Tags](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-tags)
+- [TeamMembers](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#list-team-members)
+- [TeamMemberships](https://docs.github.com/en/rest/teams/members?apiVersion=2022-11-28#get-team-membership-for-a-user)
+- [Teams](https://docs.github.com/en/rest/teams/teams?apiVersion=2022-11-28#list-teams)
+- [Users](https://docs.github.com/en/rest/orgs/members?apiVersion=2022-11-28#list-organization-members)
+- [Issue timeline events](https://docs.github.com/en/rest/issues/timeline?apiVersion=2022-11-28#list-timeline-events-for-an-issue)
+
+This connector outputs the following incremental streams:
+
+- [Comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments-for-a-repository)
+- [Commit comment reactions](https://docs.github.com/en/rest/reference/reactions?apiVersion=2022-11-28#list-reactions-for-a-commit-comment)
+- [Commit comments](https://docs.github.com/en/rest/commits/comments?apiVersion=2022-11-28#list-commit-comments-for-a-repository)
+- [Commits](https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#list-commits)
+- [Deployments](https://docs.github.com/en/rest/deployments/deployments?apiVersion=2022-11-28#list-deployments)
+- [Events](https://docs.github.com/en/rest/activity/events?apiVersion=2022-11-28#list-repository-events)
+- [Issue comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue-comment)
+- [Issue events](https://docs.github.com/en/rest/issues/events?apiVersion=2022-11-28#list-issue-events-for-a-repository)
+- [Issue milestones](https://docs.github.com/en/rest/issues/milestones?apiVersion=2022-11-28#list-milestones)
+- [Issue reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-an-issue)
+- [Issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues)
+- [Project (Classic) cards](https://docs.github.com/en/rest/projects/cards?apiVersion=2022-11-28#list-project-cards)
+- [Project (Classic) columns](https://docs.github.com/en/rest/projects/columns?apiVersion=2022-11-28#list-project-columns)
+- [Projects (Classic)](https://docs.github.com/en/rest/projects/projects?apiVersion=2022-11-28#list-repository-projects)
+- [ProjectsV2](https://docs.github.com/en/graphql/reference/objects#projectv2)
+- [Pull request comment reactions](https://docs.github.com/en/rest/reactions/reactions?apiVersion=2022-11-28#list-reactions-for-a-pull-request-review-comment)
+- [Pull request stats](https://docs.github.com/en/graphql/reference/objects#pullrequest)
+- [Pull requests](https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#list-pull-requests)
+- [Releases](https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases)
+- [Repositories](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-organization-repositories)
+- [Review comments](https://docs.github.com/en/rest/pulls/comments?apiVersion=2022-11-28#list-review-comments-in-a-repository)
+- [Reviews](https://docs.github.com/en/rest/pulls/reviews?apiVersion=2022-11-28#list-reviews-for-a-pull-request)
+- [Stargazers](https://docs.github.com/en/rest/activity/starring?apiVersion=2022-11-28#list-stargazers)
+- [WorkflowJobs](https://docs.github.com/pt/rest/actions/workflow-jobs?apiVersion=2022-11-28#list-jobs-for-a-workflow-run)
+- [WorkflowRuns](https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository)
+- [Workflows](https://docs.github.com/en/rest/actions/workflows?apiVersion=2022-11-28#list-repository-workflows)
+
+### Notes
+
+1. Only 4 streams \(`comments`, `commits`, `issues` and `review comments`\) from the listed above streams are pure incremental meaning that they:
+
+ - read only new records;
+ - output only new records.
+
+2. Streams `workflow_runs` and `worflow_jobs` is almost pure incremental:
+
+ - read new records and some portion of old records (in past 30 days) [docs](https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs);
+ - the `workflow_jobs` depends on the `workflow_runs` to read the data, so they both follow the same logic [docs](https://docs.github.com/pt/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run);
+ - output only new records.
+
+3. Other 19 incremental streams are also incremental but with one difference, they:
+
+ - read all records;
+ - output only new records.
+ Please, consider this behaviour when using those 19 incremental streams because it may affect you API call limits.
+
+4. Sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records \(respective `WARN` log message will be outputted\). In this case Specifying more recent `start_date` may help.
+ **The "Start date" configuration option does not apply to the streams below, because the GitHub API does not include dates which can be used for filtering:**
+
+- `assignees`
+- `branches`
+- `collaborators`
+- `issue_labels`
+- `organizations`
+- `pull_request_commits`
+- `pull_request_stats`
+- `repositories`
+- `tags`
+- `teams`
+- `users`
+
+## Limitations & Troubleshooting
+
+
+
+Expand to see details about GitHub connector limitations and troubleshooting.
+
+
+### Connector limitations
+
+#### Rate limiting
+The GitHub connector should not run into GitHub API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. Refer to GitHub article [Rate limits for the REST API](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api).
+
+#### Permissions and scopes
+
+If you use OAuth authentication method, the OAuth2.0 application requests the next list of [scopes](https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps#available-scopes): **repo**, **read:org**, **read:repo_hook**, **read:user**, **read:discussion**, **workflow**. For [personal access token](https://github.com/settings/tokens) you need to manually select needed scopes.
+
+Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions:
+
+- For syncing Collaborators, the user which generates the personal access token must be a collaborator. To become a collaborator, they must be invited by an owner. If there are no collaborators, no records will be synced. Read more about access permissions [here](https://docs.github.com/en/get-started/learning-about-github/access-permissions-on-github).
+- Syncing [Teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams_do_not_exists) is only available to authenticated members of a team's [organization](https://docs.github.com/en/rest/orgs). [Personal user accounts](https://docs.github.com/en/get-started/learning-about-github/types-of-github-accounts) and repositories belonging to them don't have access to Teams features. In this case no records will be synced.
+- To sync the Projects stream, the repository must have the Projects feature enabled.
+
+### Troubleshooting
+
+* Check out common troubleshooting issues for the GitHub source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions)
+
+
+
+## Changelog
+
+| Version | Date | Pull Request | Subject |
+|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams |
+| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code |
+| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image |
+| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response |
+| 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` |
+| 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields |
+| 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ |
+| 1.4.5 | 2023-10-02 | [31023](https://github.com/airbytehq/airbyte/pull/31023) | Increase backoff for stream `Contributor Activity` |
+| 1.4.4 | 2023-10-02 | [30971](https://github.com/airbytehq/airbyte/pull/30971) | Mark `start_date` as optional. |
+| 1.4.3 | 2023-10-02 | [30979](https://github.com/airbytehq/airbyte/pull/30979) | Fetch archived records in `Project Cards` |
+| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages |
+| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version |
+| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` |
+| 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` |
+| 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` |
+| 1.2.1 | 2023-09-22 | [30693](https://github.com/airbytehq/airbyte/pull/30693) | Handle 404 error in `TeamMemberShips` |
+| 1.2.0 | 2023-09-22 | [30647](https://github.com/airbytehq/airbyte/pull/30647) | Add support for self-hosted GitHub instances |
+| 1.1.1 | 2023-09-21 | [30654](https://github.com/airbytehq/airbyte/pull/30654) | Rewrite source connection error messages |
+| 1.1.0 | 2023-08-03 | [30615](https://github.com/airbytehq/airbyte/pull/30615) | Add new stream `Contributor Activity` |
+| 1.0.4 | 2023-08-03 | [29031](https://github.com/airbytehq/airbyte/pull/29031) | Reverted `advancedAuth` spec changes |
+| 1.0.3 | 2023-08-01 | [28910](https://github.com/airbytehq/airbyte/pull/28910) | Updated `advancedAuth` broken references |
+| 1.0.2 | 2023-07-11 | [28144](https://github.com/airbytehq/airbyte/pull/28144) | Add `archived_at` property to `Organizations` schema parameter |
+| 1.0.1 | 2023-05-22 | [25838](https://github.com/airbytehq/airbyte/pull/25838) | Deprecate "page size" input parameter |
+| 1.0.0 | 2023-05-19 | [25778](https://github.com/airbytehq/airbyte/pull/25778) | Improve repo(s) name validation on UI |
+| 0.5.0 | 2023-05-16 | [25793](https://github.com/airbytehq/airbyte/pull/25793) | Implement client-side throttling of requests |
+| 0.4.11 | 2023-05-12 | [26025](https://github.com/airbytehq/airbyte/pull/26025) | Added more transparent depiction of the personal access token expired |
+| 0.4.10 | 2023-05-15 | [26075](https://github.com/airbytehq/airbyte/pull/26075) | Add more specific error message description for no repos case. |
+| 0.4.9 | 2023-05-01 | [24523](https://github.com/airbytehq/airbyte/pull/24523) | Add undeclared columns to spec |
+| 0.4.8 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/25312) | Fix repo name validation |
+| 0.4.7 | 2023-03-24 | [24457](https://github.com/airbytehq/airbyte/pull/24457) | Add validation and transformation for repositories config |
+| 0.4.6 | 2023-03-24 | [24398](https://github.com/airbytehq/airbyte/pull/24398) | Fix caching for `get_starting_point` in stream "Commits" |
+| 0.4.5 | 2023-03-23 | [24417](https://github.com/airbytehq/airbyte/pull/24417) | Add pattern_descriptors to fields with an expected format |
+| 0.4.4 | 2023-03-17 | [24255](https://github.com/airbytehq/airbyte/pull/24255) | Add field groups and titles to improve display of connector setup form |
+| 0.4.3 | 2023-03-04 | [22993](https://github.com/airbytehq/airbyte/pull/22993) | Specified date formatting in specification |
+| 0.4.2 | 2023-03-03 | [23467](https://github.com/airbytehq/airbyte/pull/23467) | added user friendly messages, added AirbyteTracedException config_error, updated SAT |
+| 0.4.1 | 2023-01-27 | [22039](https://github.com/airbytehq/airbyte/pull/22039) | Set `AvailabilityStrategy` for streams explicitly to `None` |
+| 0.4.0 | 2023-01-20 | [21457](https://github.com/airbytehq/airbyte/pull/21457) | Use GraphQL for `issue_reactions` stream |
+| 0.3.12 | 2023-01-18 | [21481](https://github.com/airbytehq/airbyte/pull/21481) | Handle 502 Bad Gateway error with proper log message |
+| 0.3.11 | 2023-01-06 | [21084](https://github.com/airbytehq/airbyte/pull/21084) | Raise Error if no organizations or repos are available during read |
+| 0.3.10 | 2022-12-15 | [20523](https://github.com/airbytehq/airbyte/pull/20523) | Revert changes from 0.3.9 |
+| 0.3.9 | 2022-12-14 | [19978](https://github.com/airbytehq/airbyte/pull/19978) | Update CDK dependency; move custom HTTPError handling into `AvailabilityStrategy` classes |
+| 0.3.8 | 2022-11-10 | [19299](https://github.com/airbytehq/airbyte/pull/19299) | Fix events and workflow_runs datetimes |
+| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 |
+| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors |
+| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream |
+| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream |
+| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream |
+| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. |
+| 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error |
+| 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` |
+| 0.2.46 | 2022-08-17 | [15730](https://github.com/airbytehq/airbyte/pull/15730) | Validate input organizations and repositories |
+| 0.2.45 | 2022-08-11 | [15420](https://github.com/airbytehq/airbyte/pull/15420) | "User" object can be "null" |
+| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream |
+| 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` |
+| 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" |
+| 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations |
+| 0.2.40 | 2022-07-01 | [14338](https://github.com/airbytehq/airbyte/pull/14338) | Revert: "Rename field `mergeable` to `is_mergeable`" |
+| 0.2.39 | 2022-06-30 | [14274](https://github.com/airbytehq/airbyte/pull/14274) | Rename field `mergeable` to `is_mergeable` |
+| 0.2.38 | 2022-06-27 | [13989](https://github.com/airbytehq/airbyte/pull/13989) | Use GraphQL for `reviews` stream |
+| 0.2.37 | 2022-06-21 | [13955](https://github.com/airbytehq/airbyte/pull/13955) | Fix "secondary rate limit" not retrying |
+| 0.2.36 | 2022-06-20 | [13926](https://github.com/airbytehq/airbyte/pull/13926) | Break point added for `workflows_runs` stream |
+| 0.2.35 | 2022-06-16 | [13763](https://github.com/airbytehq/airbyte/pull/13763) | Use GraphQL for `pull_request_stats` stream |
+| 0.2.34 | 2022-06-14 | [13707](https://github.com/airbytehq/airbyte/pull/13707) | Fix API sorting, fix `get_starting_point` caching |
+| 0.2.33 | 2022-06-08 | [13558](https://github.com/airbytehq/airbyte/pull/13558) | Enable caching only for parent streams |
+| 0.2.32 | 2022-06-07 | [13531](https://github.com/airbytehq/airbyte/pull/13531) | Fix different result from `get_starting_point` when reading by pages |
+| 0.2.31 | 2022-05-24 | [13115](https://github.com/airbytehq/airbyte/pull/13115) | Add incremental support for streams `WorkflowRuns` |
+| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` |
+| 0.2.29 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy |
+| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` |
+| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec |
+| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream |
+| 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling |
+| 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns |
+| 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta |
+| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code |
+| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` |
+| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` |
+| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name |
+| 0.2.18 | 2021-02-09 | [10193](https://github.com/airbytehq/airbyte/pull/10193) | Add handling secondary rate limits |
+| 0.2.17 | 2021-02-02 | [9999](https://github.com/airbytehq/airbyte/pull/9999) | Remove BAD_GATEWAY code from backoff_time |
+| 0.2.16 | 2021-02-02 | [9868](https://github.com/airbytehq/airbyte/pull/9868) | Add log message for streams that are restricted for OAuth. Update oauth scopes. |
+| 0.2.15 | 2021-01-26 | [9802](https://github.com/airbytehq/airbyte/pull/9802) | Add missing fields for auto_merge in pull request stream |
+| 0.2.14 | 2021-01-21 | [9664](https://github.com/airbytehq/airbyte/pull/9664) | Add custom pagination size for large streams |
+| 0.2.13 | 2021-01-20 | [9619](https://github.com/airbytehq/airbyte/pull/9619) | Fix logging for function `should_retry` |
+| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response |
+| 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental |
+| 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses |
+| 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description |
+| 0.2.7 | 2021-12-06 | [8518](https://github.com/airbytehq/airbyte/pull/8518) | Add connection retry with GitHub |
+| 0.2.6 | 2021-11-24 | [8030](https://github.com/airbytehq/airbyte/pull/8030) | Support start date property for PullRequestStats and Reviews streams |
+| 0.2.5 | 2021-11-21 | [8170](https://github.com/airbytehq/airbyte/pull/8170) | Fix slow check connection for organizations with a lot of repos |
+| 0.2.4 | 2021-11-11 | [7856](https://github.com/airbytehq/airbyte/pull/7856) | Resolve $ref fields in some stream schemas |
+| 0.2.3 | 2021-10-06 | [6833](https://github.com/airbytehq/airbyte/pull/6833) | Fix config backward compatability |
+| 0.2.2 | 2021-10-05 | [6761](https://github.com/airbytehq/airbyte/pull/6761) | Add oauth worflow specification |
+| 0.2.1 | 2021-09-22 | [6223](https://github.com/airbytehq/airbyte/pull/6223) | Add option to pull commits from user-specified branches |
+| 0.2.0 | 2021-09-19 | [5898](https://github.com/airbytehq/airbyte/pull/5898) and [6227](https://github.com/airbytehq/airbyte/pull/6227) | Don't minimize any output fields & add better error handling |
+| 0.1.11 | 2021-09-15 | [5949](https://github.com/airbytehq/airbyte/pull/5949) | Add caching for all streams |
+| 0.1.10 | 2021-09-09 | [5860](https://github.com/airbytehq/airbyte/pull/5860) | Add reaction streams |
+| 0.1.9 | 2021-09-02 | [5788](https://github.com/airbytehq/airbyte/pull/5788) | Handling empty repository, check method using RepositoryStats stream |
+| 0.1.8 | 2021-09-01 | [5757](https://github.com/airbytehq/airbyte/pull/5757) | Add more streams |
+| 0.1.7 | 2021-08-27 | [5696](https://github.com/airbytehq/airbyte/pull/5696) | Handle negative backoff values |
+| 0.1.6 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5223) | Add MultipleTokenAuthenticator |
+| 0.1.5 | 2021-08-18 | [5456](https://github.com/airbytehq/airbyte/pull/5456) | Fix set up validation |
+| 0.1.4 | 2021-08-13 | [5136](https://github.com/airbytehq/airbyte/pull/5136) | Support syncing multiple repositories/organizations |
+| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams |
+| 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting |
+| 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream |
+| 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub |
+
+
\ No newline at end of file
diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/with_not_required_steps.md b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/with_not_required_steps.md
new file mode 100644
index 000000000000..942837b08a73
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/data/docs/with_not_required_steps.md
@@ -0,0 +1,123 @@
+# Oracle Netsuite
+
+One unified business management suite, encompassing ERP/Financials, CRM and ecommerce for more than 31,000 customers.
+
+This connector implements the [SuiteTalk REST Web Services](https://docs.oracle.com/en/cloud/saas/netsuite/ns-online-help/chapter_1540391670.html) and uses REST API to fetch the customers data.
+
+## Prerequisites
+* Oracle NetSuite [account](https://system.netsuite.com/pages/customerlogin.jsp?country=US)
+* Allowed access to all Account permissions options
+
+## Setup guide
+### Step 1: Create Oracle NetSuite account
+
+1. Create [account](https://system.netsuite.com/pages/customerlogin.jsp?country=US) on Oracle NetSuite
+2. Confirm your Email
+
+### Step 2: Setup NetSuite account
+#### Step 2.1: Obtain Realm info
+1. Login into your NetSuite [account](https://system.netsuite.com/pages/customerlogin.jsp?country=US)
+2. Go to **Setup** » **Company** » **Company Information**
+3. Copy your Account ID (Realm). It should look like **1234567** for the `Production` env. or **1234567_SB2** - for a `Sandbox`
+#### Step 2.2: Enable features
+1. Go to **Setup** » **Company** » **Enable Features**
+2. Click on **SuiteCloud** tab
+3. Scroll down to **SuiteScript** section
+4. Enable checkbox for `CLIENT SUITESCRIPT` and `SERVER SUITESCRIPT`
+5. Scroll down to **Manage Authentication** section
+6. Enable checkbox `TOKEN-BASED AUTHENTICATION`
+7. Scroll down to **SuiteTalk (Web Services)**
+8. Enable checkbox `REST WEB SERVISES`
+9. Save the changes
+#### Step 2.3: Create Integration (obtain Consumer Key and Consumer Secret)
+1. Go to **Setup** » **Integration** » **Manage Integrations** » **New**
+2. Fill the **Name** field (we recommend to put `airbyte-rest-integration` for a name)
+3. Make sure the **State** is `enabled`
+4. Enable checkbox `Token-Based Authentication` in **Authentication** section
+5. Save changes
+6. After that, **Consumer Key** and **Consumer Secret** will be showed once (copy them to the safe place)
+#### Step 2.4: Setup Role
+1. Go to **Setup** » **Users/Roles** » **Manage Roles** » **New**
+2. Fill the **Name** field (we recommend to put `airbyte-integration-role` for a name)
+3. Scroll down to **Permissions** tab
+4. (REQUIRED) Click on `Transactions` and manually `add` all the dropdown entities with either `full` or `view` access level.
+5. (REQUIRED) Click on `Reports` and manually `add` all the dropdown entities with either `full` or `view` access level.
+6. (REQUIRED) Click on `Lists` and manually `add` all the dropdown entities with either `full` or `view` access level.
+7. (REQUIRED) Click on `Setup` and manually `add` all the dropdown entities with either `full` or `view` access level.
+* Make sure you've done all `REQUIRED` steps correctly, to avoid sync issues in the future.
+* Please edit these params again when you `rename` or `customise` any `Object` in Netsuite for `airbyte-integration-role` to reflect such changes.
+
+#### Step 2.5: Setup User
+1. Go to **Setup** » **Users/Roles** » **Manage Users**
+2. In column `Name` click on the user’s name you want to give access to the `airbyte-integration-role`
+3. Then click on **Edit** button under the user’s name
+4. Scroll down to **Access** tab at the bottom
+5. Select from dropdown list the `airbyte-integration-role` role which you created in step 2.4
+6. Save changes
+
+#### Step 2.6: Create Access Token for role
+1. Go to **Setup** » **Users/Roles** » **Access Tokens** » **New**
+2. Select an **Application Name**
+3. Under **User** select the user you assigned the `airbyte-integration-role` in the step **2.4**
+4. Inside **Role** select the one you gave to the user in the step **2.5**
+5. Under **Token Name** you can give a descriptive name to the Token you are creating (we recommend to put `airbyte-rest-integration-token` for a name)
+6. Save changes
+7. After that, **Token ID** and **Token Secret** will be showed once (copy them to the safe place)
+
+#### Step 2.7: Summary
+You have copied next parameters
+* Realm (Account ID)
+* Consumer Key
+* Consumer Secret
+* Token ID
+* Token Secret
+Also you have properly **Configured Account** with **Correct Permissions** and **Access Token** for User and Role you've created early.
+
+### Step 3: Set up the source connector in Airbyte
+### For Airbyte Cloud:
+
+1. [Log into your Airbyte Cloud](https://cloud.airbyte.com/workspaces) account.
+2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**.
+3. On the source setup page, select **NetSuite** from the Source type dropdown and enter a name for this connector.
+4. Add **Realm**
+5. Add **Consumer Key**
+6. Add **Consumer Secret**
+7. Add **Token ID**
+8. Add **Token Secret**
+9. Click `Set up source`.
+
+### For Airbyte OSS:
+
+1. Go to local Airbyte page.
+2. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**.
+3. On the source setup page, select **NetSuite** from the Source type dropdown and enter a name for this connector.
+4. Add **Realm**
+5. Add **Consumer Key**
+6. Add **Consumer Secret**
+7. Add **Token ID**
+8. Add **Token Secret**
+9. Click `Set up source`
+
+
+## Supported sync modes
+
+The NetSuite source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes):
+ - Full Refresh
+ - Incremental
+
+## Supported Streams
+
+- Streams are generated based on `ROLE` and `USER` access to them as well as `Account` settings, make sure you're using the correct role assigned in our case `airbyte-integration-role` or any other custom `ROLE` granted to the Access Token, having the access to the NetSuite objects for data sync, please refer to the **Setup guide** > **Step 2.4** and **Setup guide** > **Step 2.5**
+
+
+## Performance considerations
+
+The connector is restricted by Netsuite [Concurrency Limit per Integration](https://docs.oracle.com/en/cloud/saas/netsuite/ns-online-help/bridgehead_156224824287.html).
+
+## Changelog
+
+| Version | Date | Pull Request | Subject |
+| :------ | :--------- | :------------------------------------------------------- | :-------------------------- |
+| 0.1.3 | 2023-01-20 | [21645](https://github.com/airbytehq/airbyte/pull/21645) | Minor issues fix, Setup Guide corrections for public docs |
+| 0.1.1 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state |
+| 0.1.0 | 2022-09-15 | [16093](https://github.com/airbytehq/airbyte/pull/16093) | Initial Alpha release |
diff --git a/airbyte-integrations/bases/connector-acceptance-test/unit_tests/test_documentation.py b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/test_documentation.py
new file mode 100644
index 000000000000..5a602e85a2e7
--- /dev/null
+++ b/airbyte-integrations/bases/connector-acceptance-test/unit_tests/test_documentation.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+
+from pathlib import Path
+
+import pytest
+from airbyte_protocol.models import ConnectorSpecification
+from connector_acceptance_test import conftest
+from connector_acceptance_test.tests.test_core import TestConnectorDocumentation as _TestConnectorDocumentation
+
+
+@pytest.mark.parametrize(
+ "connector_spec, docs_path, should_fail",
+ (
+ # SUCCESS: required field from spec exists in Prerequisites section
+ (
+ {"required": ["start_date"], "properties": {"start_date": {"title": "Start Date"}}},
+ "data/docs/incorrect_not_all_structure.md",
+ False
+ ),
+ # FAIL: required field from spec does not exist in Prerequisites section
+ (
+ {"required": ["access_token"], "properties": {"access_token": {"title": "Access Token"}}},
+ "data/docs/incorrect_not_all_structure.md",
+ True
+ )
+ )
+)
+def test_documentation_prerequisites_section(connector_spec, docs_path, should_fail):
+ t = _TestConnectorDocumentation()
+ docs_path = Path(__file__).parent / docs_path
+ with open(docs_path, "r") as f:
+ documentation = f.read().rstrip()
+
+ if should_fail is True:
+ with pytest.raises(AssertionError):
+ t.test_prerequisites_content(True, ConnectorSpecification(connectionSpecification=connector_spec), documentation, docs_path)
+ else:
+ t.test_prerequisites_content(True, ConnectorSpecification(connectionSpecification=connector_spec), documentation, docs_path)
+
+
+@pytest.mark.parametrize(
+ "metadata, docs_path, should_fail, failure",
+ (
+ # FAIL: Docs does not have required headers from standard template
+ (
+ {"data": {"name": "GitHub"}},
+ "data/docs/incorrect_not_all_structure.md",
+ True,
+ "Missing headers:",
+ ),
+ # FAIL: Docs does not have required headers from standard template
+ (
+ {"data": {"name": "Oracle Netsuite"}},
+ "data/docs/with_not_required_steps.md",
+ True,
+ "Actual Heading: 'Create Oracle NetSuite account'. Possible correct heading",
+ ),
+ # # SUCCESS: Docs follow standard template
+ (
+ {"data": {"name": "GitHub"}},
+ "data/docs/correct.md",
+ False,
+ "",
+ ),
+ # Fail: Incorrect header order
+ (
+ {"data": {"name": "GitHub"}},
+ "data/docs/incorrect_header_order.md",
+ True,
+ "Actual Heading: 'Prerequisites'. Expected Heading: 'GitHub'",
+ ),
+ )
+)
+def test_docs_structure_is_correct(mocker, metadata, docs_path, should_fail, failure):
+ t = _TestConnectorDocumentation()
+
+ docs_path = Path(__file__).parent / docs_path
+ with open(docs_path, "r") as f:
+ documentation = f.read().rstrip()
+
+ if should_fail:
+ with pytest.raises(BaseException) as e:
+ t.test_docs_structure(True, documentation, metadata)
+ assert e.match(failure)
+ else:
+ t.test_docs_structure(True, documentation, metadata)
+
+
+@pytest.mark.parametrize(
+ "metadata, docs_path, should_fail",
+ (
+ # FAIL: Prerequisites section does not follow standard template
+ (
+ {"data": {"name": "GitHub"}},
+ "data/docs/incorrect_not_all_structure.md",
+ True,
+ ),
+ # SUCCESS: Section descriptions follow standard template
+ (
+ {"data": {"name": "GitHub"}},
+ "data/docs/correct.md",
+ False,
+ ),
+ # SUCCESS: Section descriptions follow standard template
+ (
+ {"data": {"name": "GitHub"}},
+ "data/docs/correct_all_description_exist.md",
+ False,
+ ),
+ )
+)
+def test_docs_description(mocker, metadata, docs_path, should_fail):
+ mocker.patch.object(conftest.pytest, "fail")
+
+ t = _TestConnectorDocumentation()
+
+ docs_path = Path(__file__).parent / docs_path
+ with open(docs_path, "r") as f:
+ documentation = f.read().rstrip()
+
+ if should_fail is True:
+ with pytest.raises(AssertionError):
+ t.test_docs_descriptions(True, docs_path, documentation, metadata)
+ else:
+ t.test_docs_descriptions(True, docs_path, documentation, metadata)
+
+
+@pytest.mark.parametrize(
+ ("docs_path", "should_fail"),
+ (
+ (
+ "data/docs/correct_all_description_exist.md",
+ False,
+ ),
+ (
+ "data/docs/invalid_links.md",
+ True,
+ ),
+ (
+ "data/docs/correct.md",
+ False,
+ ),
+ )
+)
+def test_docs_urls(docs_path, should_fail):
+ t = _TestConnectorDocumentation()
+ docs_path = Path(__file__).parent / docs_path
+ with open(docs_path, "r") as f:
+ documentation = f.read().rstrip()
+
+ if should_fail is True:
+ with pytest.raises(AssertionError):
+ t.test_validate_links(True, documentation)
+ else:
+ t.test_validate_links(True, documentation)
diff --git a/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md b/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md
index e66c4cecd927..5c38d69cc276 100644
--- a/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md
+++ b/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md
@@ -303,6 +303,16 @@ Some examples of the types of tests covered are verification that streams define
| `allowed_hosts.bypass_reason` | object with `bypass_reason` | None | Defines the `bypass_reason` description about why the `allowedHosts` check for the certified connector should be skipped |
| `suggested_streams.bypass_reason` | object with `bypass_reason` | None | Defines the `bypass_reason` description about why the `suggestedStreams` check for the certified connector should be skipped |
+## Test Connector Documentation
+
+Verifies that connectors documentation follows our standard template, does have correct order of headings,
+does not have missing headings and all required fields in Prerequisites section.
+
+| Input | Type | Default | Note |
+|:------------------|:-------|:----------------------|:-------------------------------------------------------------------|
+| `config_path` | string | `secrets/config.json` | Path to a JSON object representing a valid connector configuration |
+| `timeout_seconds` | int | 20\*60 | Test execution timeout in seconds |
+
## Strictness level
To enforce maximal coverage of acceptances tests we expose a `test_strictness_level` field at the root of the `acceptance-test-config.yml` configuration.