From c53ff7496e29a4661fdad56712edd48d3c81d4c8 Mon Sep 17 00:00:00 2001 From: ykurochkin Date: Fri, 14 Jan 2022 00:36:16 +0200 Subject: [PATCH 1/5] Source Github: Remove optional parameter Accept for reaction's streams to fix error with 502 HTTP status code --- airbyte-integrations/connectors/source-github/Dockerfile | 2 +- .../connectors/source-github/source_github/streams.py | 3 --- docs/integrations/sources/github.md | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index 3a7e115d6489..3508845d3b6a 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.10 +LABEL io.airbyte.version=0.2.11 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 62c65a695688..20435cb232e4 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -765,9 +765,6 @@ def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: for parent_record in self._parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice): yield {self.parent_key: parent_record[self.parent_key], "repository": stream_slice["repository"]} - def request_headers(self, **kwargs) -> Mapping[str, Any]: - return {"Accept": "application/vnd.github.squirrel-girl-preview+json"} - class CommitCommentReactions(ReactionStream): """ diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 7e6b0972a08e..286c1370cd35 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -92,6 +92,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.11 | 2021-01-?? | [????](https://github.com/airbytehq/airbyte/pull/????) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | | 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | | 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | | 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | From 2cf4b26583e5559813175c981f767cab45f40cf6 Mon Sep 17 00:00:00 2001 From: ykurochkin Date: Mon, 17 Jan 2022 10:07:34 +0200 Subject: [PATCH 2/5] add logger for rate limit handler --- .../source-github/source_github/streams.py | 9 +++++++-- .../source-github/unit_tests/test_stream.py | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 20435cb232e4..62ebfae14900 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -52,10 +52,15 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, def should_retry(self, response: requests.Response) -> bool: # We don't call `super()` here because we have custom error handling and GitHub API sometimes returns strange # errors. So in `read_records()` we have custom error handling which don't require to call `super()` here. - return response.headers.get("X-RateLimit-Remaining") == "0" or response.status_code in ( + retry_flag = response.headers.get("X-RateLimit-Remaining") == "0" or response.status_code in ( requests.codes.SERVER_ERROR, requests.codes.BAD_GATEWAY, ) + if retry_flag: + self.logger.info( + f"Rate limit handling for the response with {response.status_code} status code with message: {response.json()}" + ) + return retry_flag def backoff_time(self, response: requests.Response) -> Union[int, float]: # This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides @@ -404,7 +409,7 @@ def read_records(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iter yield from super().read_records(stream_state=stream_state, **kwargs) def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: - return f"repos/{stream_slice['repository']}/pulls" + return f"repos/{stream_slice['repository']}/pullsasd" def transform(self, record: MutableMapping[str, Any], repository: str = None, **kwargs) -> MutableMapping[str, Any]: record = super().transform(record=record, repository=repository) diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 1f06233fd45a..9bc04e8db272 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -20,7 +20,12 @@ def test_bad_gateway_retry(time_mock): stream = PullRequestCommentReactions(**args) stream_slice = {"repository": "test_repo", "id": "id"} - responses.add("GET", "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", status=HTTPStatus.BAD_GATEWAY) + responses.add( + "GET", + "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", + status=HTTPStatus.BAD_GATEWAY, + json={"message": "Bad request"}, + ) with pytest.raises(BaseBackoffException): list(stream.read_records(sync_mode="full_refresh", stream_slice=stream_slice)) @@ -28,7 +33,12 @@ def test_bad_gateway_retry(time_mock): assert sleep_delays == DEFAULT_BACKOFF_DELAYS time_mock.reset_mock() - responses.add("GET", "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", status=HTTPStatus.INTERNAL_SERVER_ERROR) + responses.add( + "GET", + "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", + status=HTTPStatus.INTERNAL_SERVER_ERROR, + json={"message": "Server Error"}, + ) with pytest.raises(BaseBackoffException): list(stream.read_records(sync_mode="full_refresh", stream_slice=stream_slice)) From a115b0128428858ae80fb9a7afab80045b03036e Mon Sep 17 00:00:00 2001 From: ykurochkin Date: Mon, 17 Jan 2022 14:21:24 +0200 Subject: [PATCH 3/5] fix mistake --- .../connectors/source-github/source_github/streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 62ebfae14900..a9499124ac30 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -409,7 +409,7 @@ def read_records(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iter yield from super().read_records(stream_state=stream_state, **kwargs) def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: - return f"repos/{stream_slice['repository']}/pullsasd" + return f"repos/{stream_slice['repository']}/pulls" def transform(self, record: MutableMapping[str, Any], repository: str = None, **kwargs) -> MutableMapping[str, Any]: record = super().transform(record=record, repository=repository) From 1202c8007fa3341f09023a24daf40e1e7e13d565 Mon Sep 17 00:00:00 2001 From: ykurochkin Date: Mon, 17 Jan 2022 14:58:50 +0200 Subject: [PATCH 4/5] update changelogs --- docs/integrations/sources/github.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 286c1370cd35..76ec536519b2 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -92,7 +92,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.2.11 | 2021-01-?? | [????](https://github.com/airbytehq/airbyte/pull/????) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | +| 0.2.11 | 2021-01-17 | [9492](https://github.com/airbytehq/airbyte/pull/9492) | Remove optional parameter `Accept` for reaction`s streams to fix error with 502 HTTP status code in response | | 0.2.10 | 2021-01-03 | [7250](https://github.com/airbytehq/airbyte/pull/7250) | Use CDK caching and convert PR-related streams to incremental | | 0.2.9 | 2021-12-29 | [9179](https://github.com/airbytehq/airbyte/pull/9179) | Use default retry delays on server error responses | | 0.2.8 | 2021-12-07 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | From 9657a1cf9e14d426bb86fad6de5f612a04fdf96f Mon Sep 17 00:00:00 2001 From: ykurochkin Date: Mon, 17 Jan 2022 15:20:47 +0200 Subject: [PATCH 5/5] update version --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 9347a5dc1119..fe8fb0d20450 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -217,7 +217,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.2.10 + dockerImageTag: 0.2.11 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 723f59f8fdd6..b1c3d5005fb4 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1985,7 +1985,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.10" +- dockerImage: "airbyte/source-github:0.2.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/github" connectionSpecification: