Skip to content

Commit

Permalink
Source Github: improve 502 handling for comments stream (#17715)
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey Chvalyuk <grubberr@gmail.com>
  • Loading branch information
grubberr authored Oct 7, 2022
1 parent 6ea207d commit f437453
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@
- name: GitHub
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerRepository: airbyte/source-github
dockerImageTag: 0.3.4
dockerImageTag: 0.3.5
documentationUrl: https://docs.airbyte.io/integrations/sources/github
icon: github.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3503,7 +3503,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-github:0.3.4"
- dockerImage: "airbyte/source-github:0.3.5"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.3.4
LABEL io.airbyte.version=0.3.5
LABEL io.airbyte.name=airbyte/source-github
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import requests
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams.http import HttpStream
from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException
from requests.exceptions import HTTPError

from .graphql import CursorStorage, QueryReactions, get_query_pull_requests, get_query_reviews
Expand Down Expand Up @@ -95,7 +96,7 @@ def backoff_time(self, response: requests.Response) -> Union[int, float]:
# `X-RateLimit-Reset` header which contains time when this hour will be finished and limits will be reset so
# we again could have 5000 per another hour.

if response.status_code == requests.codes.SERVER_ERROR:
if response.status_code in (requests.codes.SERVER_ERROR, requests.codes.BAD_GATEWAY):
return None

retry_after = int(response.headers.get("Retry-After", 0))
Expand All @@ -107,6 +108,16 @@ def backoff_time(self, response: requests.Response) -> Union[int, float]:

return max(backoff_time, 60) # This is a guarantee that no negative value will be returned.

def get_error_display_message(self, exception: BaseException) -> Optional[str]:
if (
isinstance(exception, DefaultBackoffException)
and exception.response.status_code == requests.codes.BAD_GATEWAY
and self.large_stream
and self.page_size > 1
):
return f'Please try to decrease the "Page size for large streams" below {self.page_size}. The stream "{self.name}" is a large stream, such streams can fail with 502 for high "page_size" values.'
return super().get_error_display_message(exception)

def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping[str, Any]]:
# get out the stream_slice parts for later use.
organisation = stream_slice.get("organization", "")
Expand Down Expand Up @@ -597,6 +608,7 @@ class Comments(IncrementalMixin, GithubStream):

use_cache = True
large_stream = True
max_retries = 7

def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
return f"repos/{stream_slice['repository']}/issues/comments"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def test_internal_server_error_retry(time_mock):
@pytest.mark.parametrize(
("http_status", "response_headers", "expected_backoff_time"),
[
(HTTPStatus.BAD_GATEWAY, {}, 60),
(HTTPStatus.BAD_GATEWAY, {}, None),
(HTTPStatus.INTERNAL_SERVER_ERROR, {}, None),
(HTTPStatus.FORBIDDEN, {"Retry-After": 120}, 120),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": 1655804724}, 300.0),
],
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/github.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa

| Version | Date | Pull Request | Subject |
| :------ | :--------- | :---------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream |
| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream |
| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream |
| 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. |
Expand Down

0 comments on commit f437453

Please sign in to comment.