diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index a383b443942db..b983d9d5f3e7c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -361,7 +361,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.3.6 + dockerImageTag: 0.3.7 documentationUrl: https://docs.airbyte.com/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 758fdad057fe5..8194fd7eb1007 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3520,7 +3520,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.3.6" +- dockerImage: "airbyte/source-github:0.3.7" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/github" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index f1ecd0d3b0133..3a625159c8485 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.3.6 +LABEL io.airbyte.version=0.3.7 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/setup.py b/airbyte-integrations/connectors/source-github/setup.py index 52380693ac320..f51356bc392d1 100644 --- a/airbyte-integrations/connectors/source-github/setup.py +++ b/airbyte-integrations/connectors/source-github/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.1.33", "vcrpy==4.1.1", "pendulum~=2.1.2", "sgqlc"] +MAIN_REQUIREMENTS = ["airbyte-cdk~=0.2.0", "pendulum~=2.1.2", "sgqlc"] TEST_REQUIREMENTS = ["pytest~=6.1", "source-acceptance-test", "responses~=0.19.0"] diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 15d5c4c54666a..4ad809cf8f42f 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -75,14 +75,26 @@ def should_retry(self, response: requests.Response) -> bool: (response.headers.get("X-RateLimit-Resource") == "graphql" and self.check_graphql_rate_limited(response.json())) # Rate limit HTTP headers # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limit-http-headers - or response.headers.get("X-RateLimit-Remaining") == "0" + or (response.status_code != 200 and response.headers.get("X-RateLimit-Remaining") == "0") # Secondary rate limits # https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits or "Retry-After" in response.headers ) if retry_flag: + headers = [ + "X-RateLimit-Resource", + "X-RateLimit-Remaining", + "X-RateLimit-Reset", + "X-RateLimit-Limit", + "X-RateLimit-Used", + "Retry-After", + ] + headers = ", ".join([f"{h}: {response.headers[h]}" for h in headers if h in response.headers]) + if headers: + headers = f"HTTP headers: {headers}," + self.logger.info( - f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code with message: {response.text}" + f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code, {headers} with message: {response.text}" ) return retry_flag diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 8e879a4c4bd2d..699ec27047d9d 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -95,7 +95,7 @@ def test_backoff_time(time_mock, http_status, response_headers, expected_backoff ("http_status", "response_headers", "text"), [ (HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'), - (HTTPStatus.OK, {"X-RateLimit-Remaining": "0"}, ""), + (HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, ""), (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""), (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""), (HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""), @@ -495,7 +495,8 @@ def test_stream_project_columns(): ProjectsResponsesAPI.register(data) - stream = ProjectColumns(Projects(**repository_args_with_start_date), **repository_args_with_start_date) + projects_stream = Projects(**repository_args_with_start_date) + stream = ProjectColumns(projects_stream, **repository_args_with_start_date) stream_state = {} @@ -537,6 +538,8 @@ def test_stream_project_columns(): ProjectsResponsesAPI.register(data) + projects_stream._session.cache.clear() + stream._session.cache.clear() records = read_incremental(stream, stream_state=stream_state) assert records == [ {"id": 24, "name": "column_24", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-04-01T10:00:00Z"}, @@ -607,6 +610,9 @@ def test_stream_project_cards(): ProjectsResponsesAPI.register(data) stream_state = {} + + projects_stream._session.cache.clear() + project_columns_stream._session.cache.clear() records = read_incremental(stream, stream_state=stream_state) assert records == [ @@ -887,7 +893,9 @@ def test_stream_team_members_full_refresh(): responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/members", json=[{"login": "login2"}]) responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/memberships/login2", json={"username": "login2"}) - stream = TeamMembers(parent=Teams(**organization_args), **repository_args) + teams_stream = Teams(**organization_args) + stream = TeamMembers(parent=teams_stream, **repository_args) + teams_stream._session.cache.clear() records = list(read_full_refresh(stream)) assert records == [ @@ -977,6 +985,7 @@ def test_stream_commit_comment_reactions_incremental_read(): json=[{"id": 154935433, "created_at": "2022-02-01T17:00:00Z"}], ) + stream._parent_stream._session.cache.clear() records = read_incremental(stream, stream_state) assert records == [ diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 8f5027b957192..14342ef5ae2a2 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -147,6 +147,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa | Version | Date | Pull Request | Subject | | :------ | :--------- | :---------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| 0.3.7 | 2022-10-20 | [18213](https://github.com/airbytehq/airbyte/pull/18213) | Skip retry on HTTP 200 | | 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors | | 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream | | 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream |