From aeb2e63509ec8e3bc5cd4541fed493288522cda1 Mon Sep 17 00:00:00 2001 From: Christo Grabowski <108154848+ChristoGrab@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:16:22 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Source=20Mailchimp:=20Implement=20S?= =?UTF-8?q?egmentMembers=20stream=20(#32782)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../integration_tests/configured_catalog.json | 14 +++ ...ured_catalog_without_email_activities.json | 14 +++ .../integration_tests/expected_records.jsonl | 2 + .../integration_tests/segments.json | 18 +++ .../integration_tests/state.json | 23 ++++ .../connectors/source-mailchimp/metadata.yaml | 2 +- .../schemas/segment_members.json | 117 ++++++++++++++++++ .../source_mailchimp/source.py | 2 + .../source_mailchimp/streams.py | 74 +++++++++++ .../unit_tests/test_source.py | 2 +- .../unit_tests/test_streams.py | 98 +++++++++++++++ docs/integrations/sources/mailchimp.md | 16 +-- 12 files changed, 372 insertions(+), 10 deletions(-) create mode 100644 airbyte-integrations/connectors/source-mailchimp/integration_tests/segments.json create mode 100644 airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/segment_members.json diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog.json index ac0531ed58f3..458ab841ad88 100644 --- a/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog.json @@ -106,6 +106,20 @@ "primary_key": [["id"]], "destination_sync_mode": "append" }, + { + "stream": { + "name": "segment_members", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["last_changed"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "cursor_field": ["last_changed"], + "primary_key": [["id"]], + "destination_sync_mode": "append" + }, { "stream": { "name": "segments", diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog_without_email_activities.json b/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog_without_email_activities.json index e5d348c8e507..befee3dcfc63 100644 --- a/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog_without_email_activities.json +++ b/airbyte-integrations/connectors/source-mailchimp/integration_tests/configured_catalog_without_email_activities.json @@ -92,6 +92,20 @@ "primary_key": [["id"]], "destination_sync_mode": "append" }, + { + "stream": { + "name": "segment_members", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["last_changed"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "cursor_field": ["last_changed"], + "primary_key": [["id"]], + "destination_sync_mode": "append" + }, { "stream": { "name": "segments", diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-mailchimp/integration_tests/expected_records.jsonl index 31d306357aae..9353cdc7b744 100644 --- a/airbyte-integrations/connectors/source-mailchimp/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-mailchimp/integration_tests/expected_records.jsonl @@ -5,6 +5,8 @@ {"stream": "list_members", "data": {"id": "458f50b08c829a8ab901d3f8f88df914", "email_address": "integration-test+Thomas@airbyte.io", "unique_email_id": "42d6d67d11", "contact_id": "475a8f7f7b5087d7be924c9b331c8316", "full_name": "Thomas", "web_id": 546044608, "email_type": "html", "status": "unsubscribed", "unsubscribe_reason": "N/A (Unsubscribed by admin)", "consents_to_one_to_one_messaging": true, "merge_fields": {"FNAME": "Thomas", "LNAME": "", "ADDRESS": "", "PHONE": "", "BIRTHDAY": ""}, "interests": {"bbbb369575": false, "97bbc1227a": false, "d802d794f8": false, "b35e48738e": false, "44d2c158e3": false, "29f73b8209": false, "2010f3c101": false, "75f1cb79fd": false, "aa2fd02c59": false, "f7b60a3c3d": false, "7733d60f61": false, "cc454d76d6": false, "797533254b": false, "9ea08b864b": false, "e2e5fdcac9": false, "8eccc648d6": false, "a7c814599e": false, "20ef45c5d3": false, "1824f5d1a5": false, "644f34517f": false, "c57e1a9ff6": false, "b97fee61c8": false, "b9d16768e3": false, "810348679c": false, "43ebb04472": false, "73ee7c1d1b": false, "045738fa17": false, "0a7cbd4449": false, "fef00a4695": false, "4a19201dc9": false, "571a80ed60": false}, "stats": {"avg_open_rate": 1, "avg_click_rate": 1}, "ip_signup": "", "timestamp_signup": "", "ip_opt": "93.73.161.112", "timestamp_opt": "2022-12-27T08:34:39+00:00", "member_rating": 2, "last_changed": "2023-11-03T20:53:12+00:00", "language": "", "vip": false, "email_client": "", "location": {"latitude": 0, "longitude": 0, "gmtoff": 0, "dstoff": 0, "country_code": "", "timezone": "", "region": ""}, "source": "Import", "tags_count": 0, "tags": [], "list_id": "16d6ec4ffc"}, "emitted_at": 1699302001460} {"stream": "lists", "data": {"id": "16d6ec4ffc", "web_id": 903380, "name": "Airbyte", "contact": {"company": "Airbyte", "address1": "kyiv", "address2": "", "city": "Kiev", "state": "30", "zip": "04200", "country": "UA", "phone": ""}, "permission_reminder": "You are receiving this email because you opted in via our website.", "use_archive_bar": true, "campaign_defaults": {"from_name": "yurii", "from_email": "integration-test+yurii@airbyte.io", "subject": "", "language": "en"}, "notify_on_subscribe": "", "notify_on_unsubscribe": "", "date_created": "2022-12-27T07:56:47+00:00", "list_rating": 0, "email_type_option": false, "subscribe_url_short": "http://eepurl.com/ihg3RD", "subscribe_url_long": "https://airbyte.us10.list-manage.com/subscribe?u=caf9055242d41edd9215d1898&id=16d6ec4ffc", "beamer_address": "us10-d527bd96ba-6d1a9988db@inbound.mailchimp.com", "visibility": "prv", "double_optin": false, "has_welcome": false, "marketing_permissions": false, "modules": [], "stats": {"member_count": 47, "unsubscribe_count": 4, "cleaned_count": 0, "member_count_since_send": 0, "unsubscribe_count_since_send": 1, "cleaned_count_since_send": 0, "campaign_count": 6, "campaign_last_sent": "2022-12-27T08:37:53+00:00", "merge_field_count": 5, "avg_sub_rate": 0, "avg_unsub_rate": 1, "target_sub_rate": 1, "open_rate": 100, "click_rate": 64.70588235294117, "last_sub_date": "2022-12-27T08:34:39+00:00", "last_unsub_date": "2023-11-06T20:18:01+00:00"}, "_links": [{"rel": "self", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Response.json"}, {"rel": "parent", "href": "https://us10.api.mailchimp.com/3.0/lists", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/Collection.json"}, {"rel": "update", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc", "method": "PATCH", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Response.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/PATCH.json"}, {"rel": "batch-sub-unsub-members", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc", "method": "POST", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/BatchPOST-Response.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/BatchPOST.json"}, {"rel": "delete", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc", "method": "DELETE"}, {"rel": "abuse-reports", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/abuse-reports", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Abuse/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/Abuse/Collection.json"}, {"rel": "activity", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/activity", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Activity/Response.json"}, {"rel": "clients", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/clients", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Clients/Response.json"}, {"rel": "growth-history", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/growth-history", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Growth/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/Growth/Collection.json"}, {"rel": "interest-categories", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/interest-categories", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/InterestCategories/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/InterestCategories/Collection.json"}, {"rel": "members", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/members", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Members/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/Members/Collection.json"}, {"rel": "merge-fields", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/merge-fields", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/MergeFields/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/MergeFields/Collection.json"}, {"rel": "segments", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/segments", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Segments/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/Segments/Collection.json"}, {"rel": "webhooks", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/webhooks", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Webhooks/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/Webhooks/Collection.json"}, {"rel": "signup-forms", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/signup-forms", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/SignupForms/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/SignupForms/Collection.json"}, {"rel": "locations", "href": "https://us10.api.mailchimp.com/3.0/lists/16d6ec4ffc/locations", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Lists/Locations/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Lists/Locations/Collection.json"}]}, "emitted_at": 1699626450570} {"stream": "reports", "data": {"id": "a79651273b", "campaign_title": "Untitled", "type": "regular", "list_id": "16d6ec4ffc", "list_is_active": true, "list_name": "Airbyte", "subject_line": "Airbyte Test", "preview_text": "", "emails_sent": 50, "abuse_reports": 0, "unsubscribed": 0, "send_time": "2022-12-27T08:36:55+00:00", "bounces": {"hard_bounces": 0, "soft_bounces": 0, "syntax_errors": 0}, "forwards": {"forwards_count": 0, "forwards_opens": 0}, "opens": {"opens_total": 412, "unique_opens": 50, "open_rate": 1, "last_open": "2023-01-09T10:07:54+00:00"}, "clicks": {"clicks_total": 48, "unique_clicks": 47, "unique_subscriber_clicks": 33, "click_rate": 0.66, "last_click": "2022-12-27T15:28:11+00:00"}, "facebook_likes": {"recipient_likes": 0, "unique_likes": 0, "facebook_likes": 0}, "list_stats": {"sub_rate": 0, "unsub_rate": 1, "open_rate": 100, "click_rate": 64.70588235294117}, "timeseries": [{"timestamp": "2022-12-27T08:00:00+00:00", "emails_sent": 50, "unique_opens": 6, "recipients_clicks": 1}, {"timestamp": "2022-12-27T09:00:00+00:00", "emails_sent": 0, "unique_opens": 43, "recipients_clicks": 0}, {"timestamp": "2022-12-27T10:00:00+00:00", "emails_sent": 0, "unique_opens": 1, "recipients_clicks": 3}, {"timestamp": "2022-12-27T11:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 11}, {"timestamp": "2022-12-27T12:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 10}, {"timestamp": "2022-12-27T13:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 3}, {"timestamp": "2022-12-27T14:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 2}, {"timestamp": "2022-12-27T15:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 3}, {"timestamp": "2022-12-27T16:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-27T17:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-27T18:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-27T19:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-27T20:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-27T21:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-27T22:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-27T23:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T00:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T01:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T02:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T03:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T04:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T05:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T06:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}, {"timestamp": "2022-12-28T07:00:00+00:00", "emails_sent": 0, "unique_opens": 0, "recipients_clicks": 0}], "ecommerce": {"total_orders": 0, "total_spent": 0, "total_revenue": 0, "currency_code": "USD"}, "delivery_status": {"enabled": false}, "_links": [{"rel": "parent", "href": "https://us10.api.mailchimp.com/3.0/reports", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/CollectionResponse.json", "schema": "https://us10.api.mailchimp.com/schema/3.0/Paths/Reports/Collection.json"}, {"rel": "self", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/Response.json"}, {"rel": "campaign", "href": "https://us10.api.mailchimp.com/3.0/campaigns/a79651273b", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Campaigns/Response.json"}, {"rel": "sub-reports", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/sub-reports", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/Sub/Response.json"}, {"rel": "abuse-reports", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/abuse-reports", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/Abuse/CollectionResponse.json"}, {"rel": "advice", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/advice", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/Advice/Response.json"}, {"rel": "open-details", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/open-details", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/OpenDetails/CollectionResponse.json"}, {"rel": "click-details", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/click-details", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/ClickDetails/CollectionResponse.json"}, {"rel": "domain-performance", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/domain-performance", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/DomainPerformance/Response.json"}, {"rel": "eepurl", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/eepurl", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/Eepurl/CollectionResponse.json"}, {"rel": "email-activity", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/email-activity", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/EmailActivity/CollectionResponse.json"}, {"rel": "locations", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/locations", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/Locations/Response.json"}, {"rel": "sent-to", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/sent-to", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/SentTo/CollectionResponse.json"}, {"rel": "unsubscribed", "href": "https://us10.api.mailchimp.com/3.0/reports/a79651273b/unsubscribed", "method": "GET", "targetSchema": "https://us10.api.mailchimp.com/schema/3.0/Definitions/Reports/Unsubs/CollectionResponse.json"}]}, "emitted_at": 1699627079113} +{"stream": "segment_members", "data": {"id": "b79e1a05afb84190ec55310c5ee3f27e", "email_address": "integration-test+Lori@airbyte.io", "unique_email_id": "44f96f7b6a", "email_type": "html", "status": "subscribed", "merge_fields": {"FNAME": "Lori", "LNAME": null, "ADDRESS": null, "PHONE": null, "BIRTHDAY": null}, "interests": {"bbbb369575": false, "97bbc1227a": false, "d802d794f8": false, "b35e48738e": false, "44d2c158e3": false, "29f73b8209": false, "2010f3c101": false, "75f1cb79fd": false, "aa2fd02c59": false, "f7b60a3c3d": false, "7733d60f61": false, "cc454d76d6": false, "797533254b": false, "9ea08b864b": false, "e2e5fdcac9": false, "8eccc648d6": false, "a7c814599e": false, "20ef45c5d3": false, "1824f5d1a5": false, "644f34517f": false, "c57e1a9ff6": false, "b97fee61c8": false, "b9d16768e3": false, "810348679c": false, "43ebb04472": false, "73ee7c1d1b": false, "045738fa17": false, "0a7cbd4449": false, "fef00a4695": false, "4a19201dc9": false, "571a80ed60": false}, "stats": {"avg_open_rate": 1, "avg_click_rate": 1}, "ip_signup": null, "timestamp_signup": null, "ip_opt": "93.73.161.112", "timestamp_opt": "2022-12-27T08:34:39+00:00", "member_rating": 2, "last_changed": "2022-12-27T08:34:39+00:00", "language": null, "vip": false, "email_client": null, "location": {"latitude": 0, "longitude": 0, "gmtoff": 0, "dstoff": 0, "country_code": null, "timezone": null}, "list_id": "16d6ec4ffc", "segment_id": 14351128}, "emitted_at": 1700762624968} +{"stream": "segment_members", "data": {"id": "0119f706aa9a1ec9757d852743033a2d", "email_address": "integration-test+Susan@airbyte.io", "unique_email_id": "b30576500a", "email_type": "html", "status": "subscribed", "merge_fields": {"FNAME": "Susan", "LNAME": null, "ADDRESS": null, "PHONE": null, "BIRTHDAY": null}, "interests": {"bbbb369575": false, "97bbc1227a": false, "d802d794f8": false, "b35e48738e": false, "44d2c158e3": false, "29f73b8209": false, "2010f3c101": false, "75f1cb79fd": false, "aa2fd02c59": false, "f7b60a3c3d": false, "7733d60f61": false, "cc454d76d6": false, "797533254b": false, "9ea08b864b": false, "e2e5fdcac9": false, "8eccc648d6": false, "a7c814599e": false, "20ef45c5d3": false, "1824f5d1a5": false, "644f34517f": false, "c57e1a9ff6": false, "b97fee61c8": false, "b9d16768e3": false, "810348679c": false, "43ebb04472": false, "73ee7c1d1b": false, "045738fa17": false, "0a7cbd4449": false, "fef00a4695": false, "4a19201dc9": false, "571a80ed60": false}, "stats": {"avg_open_rate": 1, "avg_click_rate": 1}, "ip_signup": null, "timestamp_signup": null, "ip_opt": "93.73.161.112", "timestamp_opt": "2022-12-27T08:34:39+00:00", "member_rating": 2, "last_changed": "2022-12-27T08:34:39+00:00", "language": null, "vip": false, "email_client": null, "location": {"latitude": 0, "longitude": 0, "gmtoff": 0, "dstoff": 0, "country_code": null, "timezone": null}, "list_id": "16d6ec4ffc", "segment_id": 14351128}, "emitted_at": 1700762624967} {"stream": "segments", "data": {"id": 13506132, "name": "Influencer", "member_count": 3, "type": "static", "created_at": "2022-12-27T08:33:35+00:00", "updated_at": "2022-12-27T08:33:35+00:00", "list_id": "16d6ec4ffc"}, "emitted_at": 1699302003309} {"stream": "tags", "data": {"id": 13506128, "name": "2022", "list_id": "16d6ec4ffc"}, "emitted_at": 1699963804499} {"stream": "unsubscribes", "data": {"email_id": "11273c9a5dc6ae6c5aaccfb77b2addfb", "email_address": "AirbyteMailchimpUser@gmail.com", "merge_fields": {"FNAME": "Joe", "LNAME": "Barry", "ADDRESS": {"addr1": "109 Barry St", "addr2": "", "city": "Gary", "state": "IN", "zip": "46401", "country": "US"}, "PHONE": "", "BIRTHDAY": ""}, "vip": false, "timestamp": "2023-11-06T20:18:01+00:00", "reason": "Did not signup for list", "campaign_id": "7847cdaeff", "list_id": "16d6ec4ffc", "list_is_active": true}, "emitted_at": 1699302005437} diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/segments.json b/airbyte-integrations/connectors/source-mailchimp/integration_tests/segments.json new file mode 100644 index 000000000000..ac6a59cb5e70 --- /dev/null +++ b/airbyte-integrations/connectors/source-mailchimp/integration_tests/segments.json @@ -0,0 +1,18 @@ +{ + "streams": [ + { + "stream": { + "name": "segment_members", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["last_changed"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "cursor_field": ["last_changed"], + "primary_key": [["id"]], + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/connectors/source-mailchimp/integration_tests/state.json b/airbyte-integrations/connectors/source-mailchimp/integration_tests/state.json index 3cc3a67b4573..5f933afe83d6 100644 --- a/airbyte-integrations/connectors/source-mailchimp/integration_tests/state.json +++ b/airbyte-integrations/connectors/source-mailchimp/integration_tests/state.json @@ -45,6 +45,29 @@ "stream_descriptor": { "name": "reports" } } }, + { + "type": "STREAM", + "stream": { + "stream_state": { + "16d6ec4ffc": { "last_changed": "2230-02-26T05:42:10+00:00" } + }, + "stream_descriptor": { "name": "segment_members" } + } + }, + { + "type": "STREAM", + "stream": { + "stream_state": { + "13506120": {"last_changed": "2222-12-27T08:34:39+00:00"}, + "13506136": {"last_changed": "2222-12-27T08:34:39+00:00"}, + "14351124": {"last_changed": "2222-12-27T08:34:39+00:00"}, + "14351504": {"last_changed": "2222-12-27T07:56:47+00:00"}, + "14351128": {"last_changed": "2222-12-27T08:34:39+00:00"}, + "13506132": {"last_changed": "2222-12-27T08:34:39+00:00"} + }, + "stream_descriptor": { "name": "segment_members" } + } + }, { "type": "STREAM", "stream": { diff --git a/airbyte-integrations/connectors/source-mailchimp/metadata.yaml b/airbyte-integrations/connectors/source-mailchimp/metadata.yaml index aec91d9fecba..349c6426a418 100644 --- a/airbyte-integrations/connectors/source-mailchimp/metadata.yaml +++ b/airbyte-integrations/connectors/source-mailchimp/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: b03a9f3e-22a5-11eb-adc1-0242ac120002 - dockerImageTag: 0.9.0 + dockerImageTag: 0.10.0 dockerRepository: airbyte/source-mailchimp documentationUrl: https://docs.airbyte.com/integrations/sources/mailchimp githubIssueLabel: source-mailchimp diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/segment_members.json b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/segment_members.json new file mode 100644 index 000000000000..3c760dfdeb43 --- /dev/null +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/segment_members.json @@ -0,0 +1,117 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": true, + "properties": { + "id": { + "type": ["null", "string"] + }, + "email_address": { + "type": ["null", "string"] + }, + "unique_email_id": { + "type": ["null", "string"] + }, + "email_type": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "merge_fields": { + "type": ["null", "object"], + "additionalProperties": true + }, + "interests": { + "type": ["null", "object"], + "additionalProperties": true + }, + "stats": { + "type": ["null", "object"], + "properties": { + "avg_open_rate": { + "type": ["null", "number"] + }, + "avg_click_rate": { + "type": ["null", "number"] + } + } + }, + "ip_signup": { + "type": ["null", "string"] + }, + "timestamp_signup": { + "type": ["null", "string"], + "format": "date-time" + }, + "ip_opt": { + "type": ["null", "string"] + }, + "timestamp_opt": { + "type": ["null", "string"] + }, + "member_rating": { + "type": ["null", "integer"] + }, + "last_changed": { + "type": ["null", "string"], + "format": "date-time" + }, + "language": { + "type": ["null", "string"] + }, + "vip": { + "type": ["null", "boolean"] + }, + "email_client": { + "type": ["null", "string"] + }, + "location": { + "type": ["null", "object"], + "properties": { + "latitude": { + "type": ["null", "number"] + }, + "longitude": { + "type": ["null", "number"] + }, + "gmtoff": { + "type": ["null", "integer"] + }, + "dstoff": { + "type": ["null", "integer"] + }, + "country_code": { + "type": ["null", "string"] + }, + "timezone": { + "type": ["null", "string"] + } + } + }, + "last_note": { + "type": ["null", "object"], + "properties": { + "note_id": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "created_by": { + "type": ["null", "string"] + }, + "note": { + "type": ["null", "string"] + } + } + }, + "list_id": { + "type": ["null", "string"] + }, + "segment_id": { + "type": ["null", "integer"] + } + } +} diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py index 4c688d2f557e..de12e5b8e2f5 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py @@ -22,6 +22,7 @@ ListMembers, Lists, Reports, + SegmentMembers, Segments, Tags, Unsubscribes, @@ -114,6 +115,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: lists, ListMembers(authenticator=authenticator), Reports(authenticator=authenticator), + SegmentMembers(authenticator=authenticator), Segments(authenticator=authenticator), Tags(authenticator=authenticator, parent=lists), Unsubscribes(authenticator=authenticator, campaign_id=campaign_id), diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py index 1f3d1c63b394..8206c8f77124 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py @@ -345,6 +345,80 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp yield self.remove_empty_datetime_fields(record) +class SegmentMembers(MailChimpListSubStream): + """ + Get information about members in a specific segment. + Docs link: https://mailchimp.com/developer/marketing/api/list-segment-members/list-members-in-segment/ + """ + + cursor_field = "last_changed" + data_field = "members" + + def nullify_empty_string_fields(self, element: Mapping[str, Any]) -> Mapping[str, Any]: + """ + SegmentMember records may contain multiple fields that are returned as empty strings, which causes validation issues for fields with declared "datetime" formats. + Since all fields are nullable, replacing any string value of "" with None is a safe way to handle these edge cases. + + :param element: A SegmentMember record, dictionary or list + """ + + if isinstance(element, dict): + # If the element is a dictionary, apply the method recursively to each value, + # replacing the empty string value with None. + element = {k: self.nullify_empty_string_fields(v) if v != "" else None for k, v in element.items()} + elif isinstance(element, list): + # If the element is a list, apply the method recursively to each item in the list. + element = [self.nullify_empty_string_fields(v) for v in element] + + return element + + def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + """ + Each slice consists of a list_id and segment_id pair + """ + segments_slices = Segments(authenticator=self.authenticator).stream_slices(sync_mode=SyncMode.full_refresh) + + for slice in segments_slices: + segment_records = Segments(authenticator=self.authenticator).read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice) + + for segment in segment_records: + yield {"list_id": segment["list_id"], "segment_id": segment["id"]} + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + list_id = stream_slice.get("list_id") + segment_id = stream_slice.get("segment_id") + return f"lists/{list_id}/segments/{segment_id}/members" + + def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], stream_slice, **kwargs) -> Iterable[Mapping]: + """ + SegmentMembers endpoint does not support sorting, so we need to filter out records that are older than the current state + """ + response = super().parse_response(response, **kwargs) + + for record in response: + # Add the segment_id foreign_key to each record + record["segment_id"] = stream_slice.get("segment_id") + + current_cursor_value = stream_state.get(str(record.get("segment_id")), {}).get(self.cursor_field) + record_cursor_value = record.get(self.cursor_field) + if current_cursor_value is None or record_cursor_value >= current_cursor_value: + yield self.nullify_empty_string_fields(record) + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + current_stream_state = current_stream_state or {} + segment_id = str(latest_record.get("segment_id")) + latest_cursor_value = latest_record.get(self.cursor_field) + + # Get the current state value for this list, if it exists + segment_state = current_stream_state.get(segment_id, {}) + current_cursor_value = segment_state.get(self.cursor_field, latest_cursor_value) + + # Update the cursor value and set it in state + updated_cursor_value = max(current_cursor_value, latest_cursor_value) + current_stream_state[segment_id] = {self.cursor_field: updated_cursor_value} + return current_stream_state + + class Segments(MailChimpListSubStream): """ Get information about all available segments for a specific list. diff --git a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_source.py b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_source.py index ed5d06630c21..92be6ee481b5 100644 --- a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_source.py @@ -90,4 +90,4 @@ def test_wrong_config(wrong_config): def test_streams_count(config): streams = SourceMailchimp().streams(config) - assert len(streams) == 11 + assert len(streams) == 12 diff --git a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py index e39f27b0aa70..47bc7028f5f8 100644 --- a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py @@ -19,6 +19,7 @@ ListMembers, Lists, Reports, + SegmentMembers, Segments, Tags, Unsubscribes, @@ -216,11 +217,25 @@ def test_list_child_request_params(auth, stream_class, stream_slice, stream_stat {"list_id": "list_2", "last_changed": "2023-10-14T00:00:00Z"}, {"list_1": {"last_changed": "2023-10-15T00:00:00Z"}, "list_2": {"last_changed": "2023-10-15T00:00:00Z"}}, ), + ( + SegmentMembers, + {"segment_1": {"last_changed": "2023-10-15T00:00:00Z"}, "segment_2": {"last_changed": "2023-10-15T00:00:00Z"}}, + {"segment_id": "segment_1", "last_changed": "2023-10-16T00:00:00Z"}, + {"segment_1": {"last_changed": "2023-10-16T00:00:00Z"}, "segment_2": {"last_changed": "2023-10-15T00:00:00Z"}}, + ), + ( + SegmentMembers, + {"segment_1": {"last_changed": "2023-10-15T00:00:00Z"}}, + {"segment_id": "segment_2", "last_changed": "2023-10-16T00:00:00Z"}, + {"segment_1": {"last_changed": "2023-10-15T00:00:00Z"}, "segment_2": {"last_changed": "2023-10-16T00:00:00Z"}}, + ) ], ids=[ "Segments: no current_stream_state", "Segments: latest_record's cursor > than current_stream_state for list_1", "ListMembers: latest_record's cursor < current_stream_state for list_2", + "SegmentMembers: latest_record's cursor > current_stream_state for segment_1", + "SegmentMembers: no stream_state for current slice, new slice added to state" ], ) def test_list_child_get_updated_state(auth, stream_class, current_stream_state, latest_record, expected_state): @@ -233,6 +248,87 @@ def test_list_child_get_updated_state(auth, stream_class, current_stream_state, assert updated_state == expected_state +@pytest.mark.parametrize( + "stream_state, records, expected", + [ + # Test case 1: No stream state, all records should be yielded + ( + {}, + {"members": [ + {"id": 1, "segment_id": "segment_1", "last_changed": "2021-01-01T00:00:00Z"}, + {"id": 2, "segment_id": "segment_1", "last_changed": "2021-01-02T00:00:00Z"} + ]}, + [ + {"id": 1, "segment_id": "segment_1", "last_changed": "2021-01-01T00:00:00Z"}, + {"id": 2, "segment_id": "segment_1", "last_changed": "2021-01-02T00:00:00Z"} + ] + ), + + # Test case 2: Records older than stream state should be filtered out + ( + {"segment_1": {"last_changed": "2021-02-01T00:00:00Z"}}, + {"members": [ + {"id": 1, "segment_id": "segment_1", "last_changed": "2021-01-01T00:00:00Z"}, + {"id": 2, "segment_id": "segment_1", "last_changed": "2021-03-01T00:00:00Z"} + ]}, + [{"id": 2, "segment_id": "segment_1", "last_changed": "2021-03-01T00:00:00Z"}] + ), + + # Test case 3: Two lists in stream state, only state for segment_id_1 determines filtering + ( + {"segment_1": {"last_changed": "2021-01-02T00:00:00Z"}, "segment_2": {"last_changed": "2022-01-01T00:00:00Z"}}, + {"members": [ + {"id": 1, "segment_id": "segment_1", "last_changed": "2021-01-01T00:00:00Z"}, + {"id": 2, "segment_id": "segment_1", "last_changed": "2021-03-01T00:00:00Z"} + ]}, + [{"id": 2, "segment_id": "segment_1", "last_changed": "2021-03-01T00:00:00Z"}] + ), + ], + ids=[ + "No stream state, all records should be yielded", + "Record < stream state, should be filtered out", + "Record >= stream state, should be yielded", + ] +) +def test_segment_members_parse_response(auth, stream_state, records, expected): + segment_members_stream = SegmentMembers(authenticator=auth) + response = MagicMock() + response.json.return_value = records + parsed_records = list(segment_members_stream.parse_response(response, stream_state, stream_slice={"segment_id": "segment_1"})) + assert parsed_records == expected, f"Expected: {expected}, Actual: {parsed_records}" + + +@pytest.mark.parametrize( + "record, expected_record", + [ + ( + {"id": 1, "email_address": "a@gmail.com", "email_type": "html", "opt_timestamp": ""}, + {"id": 1, "email_address": "a@gmail.com", "email_type": "html", "opt_timestamp": None} + ), + ( + {"id": 1, "email_address": "a@gmail.com", "email_type": "html", "opt_timestamp": "2022-01-01T00:00:00.000Z", "merge_fields": {"FNAME": "Bob", "LNAME": "", "ADDRESS": "", "PHONE": ""}}, + {"id": 1, "email_address": "a@gmail.com", "email_type": "html", "opt_timestamp": "2022-01-01T00:00:00.000Z", "merge_fields": {"FNAME": "Bob", "LNAME": None, "ADDRESS": None, "PHONE": None}} + ), + ( + {"id": 1, "email_address": "a@gmail.com", "email_type": "html", "opt_timestamp": "2022-01-01T00:00:00.000Z", "merge_fields": {"FNAME": "Bob", "LNAME": "Bobson", "ADDRESS": "101 Bob Ln", "PHONE": "111-111-1111"}}, + {"id": 1, "email_address": "a@gmail.com", "email_type": "html", "opt_timestamp": "2022-01-01T00:00:00.000Z", "merge_fields": {"FNAME": "Bob", "LNAME": "Bobson", "ADDRESS": "101 Bob Ln", "PHONE": "111-111-1111"}} + ) + ], + ids=[ + "Replace empty string with None", + "Replace empty strings with None in nested fields", + "Leave non-empty string fields unchanged" + ] +) +def test_segment_members_nullify_empty_string_fields(auth, record, expected_record): + """ + Tests that empty string values in SegmentMembers stream are converted to None + """ + stream = SegmentMembers(authenticator=auth) + + assert stream.nullify_empty_string_fields(record) == expected_record + + def test_unsubscribes_stream_slices(requests_mock, unsubscribes_stream, campaigns_stream, mock_campaigns_response): campaigns_url = campaigns_stream.url_base + campaigns_stream.path() requests_mock.register_uri("GET", campaigns_url, json={"campaigns": mock_campaigns_response}) @@ -460,6 +556,7 @@ def test_403_error_handling( (Interests, {"parent": {"list_id": "123", "id": "456"}}, "lists/123/interest-categories/456/interests"), (ListMembers, {"list_id": "123"}, "lists/123/members"), (Reports, {}, "reports"), + (SegmentMembers, {"list_id": "123", "segment_id": "456"}, "lists/123/segments/456/members"), (Segments, {"list_id": "123"}, "lists/123/segments"), (Tags, {"parent": {"id": "123"}}, "lists/123/tag-search"), (Unsubscribes, {"campaign_id": "123"}, "reports/123/unsubscribed"), @@ -473,6 +570,7 @@ def test_403_error_handling( "Interests", "ListMembers", "Reports", + "SegmentMembers", "Segments", "Tags", "Unsubscribes", diff --git a/docs/integrations/sources/mailchimp.md b/docs/integrations/sources/mailchimp.md index 9106108da3fd..4fa227cccc46 100644 --- a/docs/integrations/sources/mailchimp.md +++ b/docs/integrations/sources/mailchimp.md @@ -28,12 +28,6 @@ The Mailchimp source connector supports the following [sync modes](https://docs. - Full Refresh - Incremental -Airbyte doesn't support Incremental Deletes for the `Campaigns`, `Lists`, and `Email Activity` streams because Mailchimp doesn't provide any information about deleted data in these streams. - -## Performance considerations - -[Mailchimp does not impose rate limits](https://mailchimp.com/developer/guides/marketing-api-conventions/#throttling) on how much data is read from its API in a single sync process. However, Mailchimp enforces a maximum of 10 simultaneous connections to its API, which means that Airbyte is unable to run more than 10 concurrent syncs from Mailchimp using API keys generated from the same account. - ## Supported streams The Mailchimp source connector supports the following streams: @@ -42,11 +36,12 @@ The Mailchimp source connector supports the following streams: [Campaigns](https://mailchimp.com/developer/marketing/api/campaigns/get-campaign-info/) [Email Activity](https://mailchimp.com/developer/marketing/api/email-activity-reports/list-email-activity/) [Interests](https://mailchimp.com/developer/marketing/api/interests/list-interests-in-category/) -[InterestCategories](https://mailchimp.com/developer/marketing/api/interest-categories/list-interest-categories/) +[Interest Categories](https://mailchimp.com/developer/marketing/api/interest-categories/list-interest-categories/) [Lists](https://mailchimp.com/developer/api/marketing/lists/get-list-info) -[ListMembers](https://mailchimp.com/developer/marketing/api/list-members/list-members-info/) +[List Members](https://mailchimp.com/developer/marketing/api/list-members/list-members-info/) [Reports](https://mailchimp.com/developer/marketing/api/reports/list-campaign-reports/) [Segments](https://mailchimp.com/developer/marketing/api/list-segments/list-segments/) +[Segment Members](https://mailchimp.com/developer/marketing/api/list-segment-members/list-members-in-segment/) [Tags](https://mailchimp.com/developer/marketing/api/lists-tags-search/search-for-tags-on-a-list-by-name/) [Unsubscribes](https://mailchimp.com/developer/marketing/api/unsub-reports/list-unsubscribed-members/) @@ -69,6 +64,10 @@ All other streams contain an `id` primary key. | `object` | `object` | properties within objects are mapped based on the mappings in this table | | `string` | `string` | | +## Performance considerations + +[Mailchimp does not impose rate limits](https://mailchimp.com/developer/guides/marketing-api-conventions/#throttling) on how much data is read from its API in a single sync process. However, Mailchimp enforces a maximum of 10 simultaneous connections to its API, which means that Airbyte is unable to run more than 10 concurrent syncs from Mailchimp using API keys generated from the same account. + ## Tutorials Now that you have set up the Mailchimp source connector, check out the following Mailchimp tutorial: @@ -79,6 +78,7 @@ Now that you have set up the Mailchimp source connector, check out the following | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------| +| 0.10.0 | 2023-11-23 | [32782](https://github.com/airbytehq/airbyte/pull/32782) | Add SegmentMembers stream | | 0.9.0 | 2023-11-17 | [32218](https://github.com/airbytehq/airbyte/pull/32218) | Add Interests, InterestCategories, Tags streams | | 0.8.3 | 2023-11-15 | [32543](https://github.com/airbytehq/airbyte/pull/32543) | Handle empty datetime fields in Reports stream | | 0.8.2 | 2023-11-13 | [32466](https://github.com/airbytehq/airbyte/pull/32466) | Improve error handling during connection check |