diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index bb346764f1b7..7a0ebc55a884 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -252,7 +252,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.2.25 + dockerImageTag: 0.2.26 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 54d294150338..1f2c38fc05d6 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2412,7 +2412,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.25" +- dockerImage: "airbyte/source-github:0.2.26" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/github" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index ba355b9c16ab..79647edc9a7e 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.25 +LABEL io.airbyte.version=0.2.26 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/acceptance-test-config.yml b/airbyte-integrations/connectors/source-github/acceptance-test-config.yml index 1bafcddb624b..f24b80955cb1 100644 --- a/airbyte-integrations/connectors/source-github/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-github/acceptance-test-config.yml @@ -37,7 +37,7 @@ tests: pull_requests: ["airbytehq/integration-test", "updated_at"] releases: ["airbytehq/integration-test", "created_at"] review_comments: ["airbytehq/integration-test", "updated_at"] - reviews: ["airbytehq/integration-test", "submitted_at"] + reviews: ["airbytehq/integration-test", "pull_request_updated_at"] stargazers: ["airbytehq/integration-test", "starred_at"] full_refresh: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json index 88c0d74e0fbd..3472a4af8d20 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json @@ -85,7 +85,7 @@ }, "reviews": { "airbytehq/integration-test": { - "submitted_at": "2121-06-29T02:04:57Z" + "pull_request_updated_at": "2121-06-29T02:04:57Z" } }, "stargazers": { diff --git a/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json index 8e99d4fb372c..4b42ed1d5818 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json @@ -311,12 +311,12 @@ "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": ["submitted_at"], + "default_cursor_field": ["pull_request_updated_at"], "source_defined_primary_key": [["id"]] }, "sync_mode": "incremental", "destination_sync_mode": "append", - "cursor_field": ["submitted_at"] + "cursor_field": ["pull_request_updated_at"] }, { "stream": { diff --git a/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json index 86698788537c..b6d55e18331c 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/sample_state.json @@ -61,7 +61,7 @@ }, "reviews": { "airbytehq/integration-test": { - "submitted_at": "2021-08-30T12:01:15Z" + "pull_request_updated_at": "2021-08-30T12:01:15Z" } } } diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json b/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json index d68e1ccfafe3..d6e480e9a1c3 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json @@ -51,6 +51,10 @@ "type": ["null", "string"], "format": "date-time" }, + "pull_request_updated_at": { + "type": "string", + "format": "date-time" + }, "commit_id": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index fcba64cc48af..79368893d407 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -661,6 +661,7 @@ def stream_slices( parent_stream_slices = super().stream_slices(sync_mode=sync_mode, cursor_field=cursor_field, stream_state=parent_state) for parent_stream_slice in parent_stream_slices: yield { + "pull_request_updated_at": parent_stream_slice["parent"]["updated_at"], "pull_request_number": parent_stream_slice["parent"]["number"], "repository": parent_stream_slice["parent"]["repository"], } @@ -708,7 +709,7 @@ class Reviews(PullRequestSubstream): API docs: https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request """ - cursor_field = "submitted_at" + cursor_field = "pull_request_updated_at" def path( self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None @@ -723,6 +724,11 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite parent_state[repository][self.parent.cursor_field] = parent_state[repository][self.cursor_field] yield from super().stream_slices(stream_state=parent_state, **kwargs) + def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]: + record = super().transform(record=record, stream_slice=stream_slice) + record[self.cursor_field] = stream_slice[self.cursor_field] + return record + class PullRequestCommits(GithubStream): """ diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 29d21f31e2ce..8656a49368ca 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -29,13 +29,14 @@ PullRequests, Releases, Repositories, + Reviews, Stargazers, Tags, Teams, Users, ) -from .utils import ProjectsResponsesAPI, read_full_refresh, read_incremental +from .utils import ProjectsResponsesAPI, read_full_refresh, read_incremental, urlbase DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80] @@ -659,3 +660,87 @@ def get_records(cursor_field): stream = Stargazers(**repository_args_with_start_date) records = read_full_refresh(stream) assert records == [{"repository": "organization/repository", "starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}, "user_id": 2}] + + +@responses.activate +def test_stream_reviews_incremental_read(): + + url_pulls = "https://api.github.com/repos/organization/repository/pulls" + + repository_args_with_start_date = { + "start_date": "2000-01-01T00:00:00Z", + "page_size_for_large_streams": 30, + "repositories": ["organization/repository"], + } + stream = Reviews(parent=PullRequests(**repository_args_with_start_date), **repository_args_with_start_date) + + responses.add( + "GET", + url_pulls, + json=[ + {"updated_at": "2022-01-01T00:00:00Z", "number": 1}, + {"updated_at": "2022-01-02T00:00:00Z", "number": 2}, + ], + ) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/pulls/1/reviews", + json=[{"id": 1000, "body": "commit1"}, {"id": 1001, "body": "commit1"}], + ) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/pulls/2/reviews", + json=[{"id": 1002, "body": "commit1"}], + ) + + stream_state = {} + records = read_incremental(stream, stream_state) + + assert records == [ + {"body": "commit1", "id": 1000, "pull_request_updated_at": "2022-01-01T00:00:00Z", "repository": "organization/repository"}, + {"body": "commit1", "id": 1001, "pull_request_updated_at": "2022-01-01T00:00:00Z", "repository": "organization/repository"}, + {"body": "commit1", "id": 1002, "pull_request_updated_at": "2022-01-02T00:00:00Z", "repository": "organization/repository"}, + ] + + assert stream_state == {"organization/repository": {"pull_request_updated_at": "2022-01-02T00:00:00Z"}} + + responses.add( + "GET", + url_pulls, + json=[ + {"updated_at": "2022-01-03T00:00:00Z", "number": 1}, + {"updated_at": "2022-01-02T00:00:00Z", "number": 2}, + {"updated_at": "2022-01-04T00:00:00Z", "number": 3}, + ], + ) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/pulls/1/reviews", + json=[{"id": 1000, "body": "commit1"}, {"id": 1001, "body": "commit2"}], + ) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/pulls/3/reviews", + json=[{"id": 1003, "body": "commit1"}], + ) + + records = read_incremental(stream, stream_state) + + assert records == [ + {"body": "commit1", "id": 1000, "pull_request_updated_at": "2022-01-03T00:00:00Z", "repository": "organization/repository"}, + {"body": "commit2", "id": 1001, "pull_request_updated_at": "2022-01-03T00:00:00Z", "repository": "organization/repository"}, + {"body": "commit1", "id": 1003, "pull_request_updated_at": "2022-01-04T00:00:00Z", "repository": "organization/repository"}, + ] + + assert stream_state == {"organization/repository": {"pull_request_updated_at": "2022-01-04T00:00:00Z"}} + + assert len(responses.calls) == 6 + assert urlbase(responses.calls[0].request.url) == url_pulls + # make sure parent stream PullRequests used ascending sorting for both HTTP requests + assert responses.calls[0].request.params["direction"] == "asc" + assert urlbase(responses.calls[3].request.url) == url_pulls + assert responses.calls[3].request.params["direction"] == "asc" diff --git a/airbyte-integrations/connectors/source-github/unit_tests/utils.py b/airbyte-integrations/connectors/source-github/unit_tests/utils.py index b318429a30cb..6f844e8f33dd 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/utils.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/utils.py @@ -3,6 +3,7 @@ # from typing import Any, MutableMapping +from urllib.parse import urlparse import responses from airbyte_cdk.models import SyncMode @@ -28,6 +29,10 @@ def read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, return res +def urlbase(url): + return urlparse(url)._replace(params="", query="", fragment="").geturl() + + class ProjectsResponsesAPI: """ Fake Responses API for github projects, columns, cards diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index deb17afa22da..96a302a60717 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -111,6 +111,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:-------------------------------------------------------------------------------------------------------------| +| 0.2.26 | 2022-03-31 | [11623](https://github.com/airbytehq/airbyte/pull/11623) | Re-factored incremental sync for `Reviews` stream | | 0.2.25 | 2022-03-31 | [11567](https://github.com/airbytehq/airbyte/pull/11567) | Improve code for better error handling | | 0.2.24 | 2022-03-30 | [9251](https://github.com/airbytehq/airbyte/pull/9251) | Add Streams Workflow and WorkflowRuns | | 0.2.23 | 2022-03-17 | [11212](https://github.com/airbytehq/airbyte/pull/11212) | Improve documentation and spec for Beta |