Skip to content

Commit

Permalink
Source Mailchimp: Handle empty fields in Reports stream (#32543)
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristoGrab authored Nov 17, 2023
1 parent e69e3de commit 4e51b7d
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: b03a9f3e-22a5-11eb-adc1-0242ac120002
dockerImageTag: 0.8.2
dockerImageTag: 0.8.3
dockerRepository: airbyte/source-mailchimp
documentationUrl: https://docs.airbyte.com/integrations/sources/mailchimp
githubIssueLabel: source-mailchimp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
"description": "The number of unique opens divided by the total number of successful deliveries."
},
"last_open": {
"type": "string",
"type": ["null", "string"],
"format": "date-time",
"title": "Last Open",
"description": "The date and time of the last recorded open in ISO 8601 format."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,21 +274,28 @@ class Reports(IncrementalMailChimpStream):
cursor_field = "send_time"
data_field = "reports"

@staticmethod
def remove_empty_datetime_fields(record: Mapping[str, Any]) -> Mapping[str, Any]:
"""
In some cases, the 'clicks.last_click' and 'opens.last_open' fields are returned as an empty string,
which causes validation errors on the `date-time` format.
To avoid this, we remove the fields if they are empty.
"""
clicks = record.get("clicks", {})
opens = record.get("opens", {})
if not clicks.get("last_click"):
clicks.pop("last_click", None)
if not opens.get("last_open"):
opens.pop("last_open", None)
return record

def path(self, **kwargs) -> str:
return "reports"

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:

response = super().parse_response(response, **kwargs)

# In some cases, the 'last_click' field is returned as an empty string,
# which causes validation errors on the `date-time` format.
# To avoid this, we remove the field if it is empty.
for record in response:
clicks = record.get("clicks", {})
if not clicks.get("last_click"):
clicks.pop("last_click", None)
yield record
yield self.remove_empty_datetime_fields(record)


class Segments(MailChimpListSubStream):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import responses
from airbyte_cdk.models import SyncMode
from requests.exceptions import HTTPError
from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Segments
from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Reports, Segments
from utils import read_full_refresh, read_incremental


Expand Down Expand Up @@ -413,3 +413,39 @@ def test_403_error_handling(
# Handle non-403 error
except HTTPError as e:
assert e.response.status_code == status_code

@pytest.mark.parametrize(
"record, expected_return",
[
(
{"clicks": {"last_click": ""}, "opens": {"last_open": ""}},
{"clicks": {}, "opens": {}},
),
(
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": ""}},
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {}},
),
(
{"clicks": {"last_click": ""}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},
{"clicks": {}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},
),
(
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},
{"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}},
),
],
ids=[
"last_click and last_open empty",
"last_click empty",
"last_open empty",
"last_click and last_open not empty"
]
)
def test_reports_remove_empty_datetime_fields(auth, record, expected_return):
"""
Tests that the Reports stream removes the 'clicks' and 'opens' fields from the response
when they are empty strings
"""
stream = Reports(authenticator=auth)
assert stream.remove_empty_datetime_fields(record) == expected_return, f"Expected: {expected_return}, Actual: {stream.remove_empty_datetime_fields(record)}"
1 change: 1 addition & 0 deletions docs/integrations/sources/mailchimp.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Now that you have set up the Mailchimp source connector, check out the following

| Version | Date | Pull Request | Subject |
|---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------|
| 0.8.3 | 2023-11-15 | [32543](https://github.com/airbytehq/airbyte/pull/32543) | Handle empty datetime fields in Reports stream |
| 0.8.2 | 2023-11-13 | [32466](https://github.com/airbytehq/airbyte/pull/32466) | Improve error handling during connection check |
| 0.8.1 | 2023-11-06 | [32226](https://github.com/airbytehq/airbyte/pull/32226) | Unmute expected records test after data anonymisation |
| 0.8.0 | 2023-11-01 | [32032](https://github.com/airbytehq/airbyte/pull/32032) | Add ListMembers stream |
Expand Down

0 comments on commit 4e51b7d

Please sign in to comment.