From e5392eebcf6beb80ac4185a890b7361a2903ca95 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Wed, 4 Jan 2023 00:32:03 +0100 Subject: [PATCH] Source Sentry: add incremental sync (#20709) * Source Sentry: add incremental sync * Source Sentry: fix test config * Source Sentry: bump version; update docs * auto-bump connector version Co-authored-by: Octavia Squidington III --- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 7 ++- .../connectors/source-sentry/Dockerfile | 2 +- .../source-sentry/acceptance-test-config.yml | 39 +++++++++++------ .../integration_tests/abnormal_state.json | 13 ++++++ .../integration_tests/configured_catalog.json | 6 +-- .../source_sentry/schemas/events.json | 3 +- .../source-sentry/source_sentry/streams.py | 43 +++++++++++++++++-- docs/integrations/sources/sentry.md | 3 +- 9 files changed, 95 insertions(+), 23 deletions(-) create mode 100644 airbyte-integrations/connectors/source-sentry/integration_tests/abnormal_state.json diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7a56685403a4..18bff87d003d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -1910,7 +1910,7 @@ - sourceDefinitionId: cdaf146a-9b75-49fd-9dd2-9d64a0bb4781 name: Sentry dockerRepository: airbyte/source-sentry - dockerImageTag: 0.1.7 + dockerImageTag: 0.1.8 documentationUrl: https://docs.airbyte.com/integrations/sources/sentry icon: sentry.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index f8efa0921ef6..d34421611d7e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -16476,7 +16476,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-sentry:0.1.7" +- dockerImage: "airbyte/source-sentry:0.1.8" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/sentry" connectionSpecification: @@ -16510,6 +16510,11 @@ type: "string" title: "Project" description: "The name (slug) of the Project you want to sync." + discover_fields: + type: "array" + item: "string" + title: "Discover Event Fields" + description: "Fields to retrieve when fetching discover events" supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] diff --git a/airbyte-integrations/connectors/source-sentry/Dockerfile b/airbyte-integrations/connectors/source-sentry/Dockerfile index a21817b7bf66..c5f4badb669b 100644 --- a/airbyte-integrations/connectors/source-sentry/Dockerfile +++ b/airbyte-integrations/connectors/source-sentry/Dockerfile @@ -34,5 +34,5 @@ COPY source_sentry ./source_sentry ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.7 +LABEL io.airbyte.version=0.1.8 LABEL io.airbyte.name=airbyte/source-sentry diff --git a/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml b/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml index 6e27bef75cf4..9f63e404fa3c 100644 --- a/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml @@ -1,18 +1,33 @@ connector_image: airbyte/source-sentry:dev -tests: +acceptance_tests: spec: - - spec_path: "source_sentry/spec.json" + tests: + - spec_path: "source_sentry/spec.json" connection: - - config_path: "secrets/config.json" - status: "succeed" - - config_path: "integration_tests/invalid_config.json" - status: "failed" + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" discovery: - - config_path: "secrets/config.json" + tests: + - config_path: "secrets/config.json" basic_read: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - empty_streams: ["issues", "events"] + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: + - name: issues + bypass_reason: "unable to populate" + - name: events + bypass_reason: "unable to populate" + incremental: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + future_state: + future_state_path: "integration_tests/abnormal_state.json" full_refresh: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-sentry/integration_tests/abnormal_state.json new file mode 100644 index 000000000000..2ceeea6afda2 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/abnormal_state.json @@ -0,0 +1,13 @@ +[ + { + "type": "STREAM", + "stream": { + "stream_state": { + "dateCreated": "2100-01-01T00:00:00.0Z" + }, + "stream_descriptor": { + "name": "projects" + } + } + } +] diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json index ed38985229c5..d03dba4404d1 100644 --- a/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json @@ -28,12 +28,12 @@ } }, { - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", + "sync_mode": "incremental", + "destination_sync_mode": "append", "stream": { "name": "projects", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"] } } ] diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json index 59345c95f7d4..c53ca5f62bf9 100644 --- a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json @@ -20,7 +20,8 @@ } }, "dateCreated": { - "type": "string" + "type": "string", + "format": "date-time" }, "user": { "type": ["null", "object"], diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py b/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py index 4e0cb131cae5..7aea30c3144a 100644 --- a/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py @@ -6,7 +6,9 @@ from abc import ABC from typing import Any, Iterable, Mapping, MutableMapping, Optional +import pendulum import requests +from airbyte_cdk.sources.streams import IncrementalMixin from airbyte_cdk.sources.streams.http import HttpStream @@ -62,11 +64,40 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp yield from response.json() -class Events(SentryStreamPagination): +class SentryIncremental(SentryStreamPagination, IncrementalMixin): + def filter_by_state(self, stream_state: Mapping[str, Any] = None, record: Mapping[str, Any] = None) -> Iterable: + """ + Endpoint does not provide query filtering params, but they provide us + cursor field in most cases, so we used that as incremental filtering + during the parsing. + """ + start_date = "1900-01-01T00:00:00.0Z" + if pendulum.parse(record[self.cursor_field]) >= pendulum.parse((stream_state or {}).get(self.cursor_field, start_date)): + yield record + + def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[MutableMapping]: + json_response = response.json() or [] + + for record in json_response: + yield from self.filter_by_state(stream_state=stream_state, record=record) + + @property + def state(self) -> Mapping[str, Any]: + return {self.cursor_field: str(self._cursor_value)} + + @state.setter + def state(self, value: Mapping[str, Any]): + self._cursor_value = value[self.cursor_field] + + +class Events(SentryIncremental): """ Docs: https://docs.sentry.io/api/events/list-a-projects-events/ """ + primary_key = "id" + cursor_field = "dateCreated" + def __init__(self, organization: str, project: str, **kwargs): super().__init__(**kwargs) self._organization = organization @@ -92,11 +123,14 @@ def request_params( return params -class Issues(SentryStreamPagination): +class Issues(SentryIncremental): """ Docs: https://docs.sentry.io/api/events/list-a-projects-issues/ """ + primary_key = "id" + cursor_field = "lastSeen" + def __init__(self, organization: str, project: str, **kwargs): super().__init__(**kwargs) self._organization = organization @@ -122,11 +156,14 @@ def request_params( return params -class Projects(SentryStreamPagination): +class Projects(SentryIncremental): """ Docs: https://docs.sentry.io/api/projects/list-your-projects/ """ + primary_key = "id" + cursor_field = "dateCreated" + def path( self, stream_state: Optional[Mapping[str, Any]] = None, diff --git a/docs/integrations/sources/sentry.md b/docs/integrations/sources/sentry.md index 7a51ce34b1ed..b6e59e96121b 100644 --- a/docs/integrations/sources/sentry.md +++ b/docs/integrations/sources/sentry.md @@ -45,7 +45,8 @@ The Sentry source connector supports the following [sync modes](https://docs.air | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------| -| 0.1.7 | 2022-09-30 | [17466](https://github.com/airbytehq/airbyte/pull/17466) | Migrate to per-stream states | +| 0.1.8 | 2022-12-20 | [20709](https://github.com/airbytehq/airbyte/pull/20709) | Add incremental sync | +| 0.1.7 | 2022-09-30 | [17466](https://github.com/airbytehq/airbyte/pull/17466) | Migrate to per-stream states | | 0.1.6 | 2022-08-29 | [16112](https://github.com/airbytehq/airbyte/pull/16112) | Revert back to the Python CDK | | 0.1.5 | 2022-08-24 | [15911](https://github.com/airbytehq/airbyte/pull/15911) | Bugfix to allowing reading schemas at runtime | | 0.1.4 | 2022-08-19 | [15800](https://github.com/airbytehq/airbyte/pull/15800) | Bugfix to allow reading sentry.yaml at runtime |