diff --git a/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl index 0a60185b5dbf..d90034355c7a 100644 --- a/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl @@ -1,7 +1,7 @@ -{"stream": "users", "data": {"id": "17841408147298757", "biography": "Airbyte is the new open-source data integration platform that consolidates your data into your warehouses.", "profile_picture_url": "https://scontent-iev1-1.xx.fbcdn.net/v/t51.2885-15/153169696_890787328349641_8382928081987798464_n.jpg?_nc_cat=111&_nc_sid=7d201b&_nc_ohc=uQq3P1OLNOYAX_JjmNN&_nc_ht=scontent-iev1-1.xx&edm=AL-3X8kEAAAA&oh=00_AfBI4aspXBrxU-bYTD-qnPWh7ex05YFFAOl_24u7JxLYrw&oe=6558D73E", "username": "airbytehq", "followers_count": 1253, "name": "Jean Lafleur", "ig_id": 8070063576, "media_count": 258, "follows_count": 14, "website": "https://www.airbyte.io/", "page_id": "144706962067225"}, "emitted_at": 1700004246764} -{"stream": "media", "data": {"id": "17884386203808767", "media_product_type": "REELS", "shortcode": "CtZs0Y3v2lx", "permalink": "https://www.instagram.com/reel/CtZs0Y3v2lx/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/B34BFFBB0614049AD69F066D153FDD8C_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=107&vs=986202625710684_1200838240&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CMzRCRkZCQjA2MTQwNDlBRDY5RjA2NkQxNTNGREQ4Q192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dDQm9HQlV3a2JxUWwtY0JBRnZGTnFBUkdQeHpicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJtDf4euHnbtAFQIoAkMzLBdAUBtDlYEGJRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfAJ_4aiqzpxj20QL_aetXfmjmA8nRmz27vnAzpiARGK5w&oe=6555EADA&_nc_sid=1d576d", "timestamp": "2023-06-12T19:20:02+0000", "media_type": "VIDEO", "caption": "Terraform Explained Part 1\n.\n.\n.\n#airbyte #dataengineering #tech #terraform #cloud #cloudengineer #coding #reels", "comments_count": 2, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 9, "ig_id": "3123724930722523505", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/353022694_609901831117241_2447211336606431614_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=1ZTHPkRhzl8AX-hZcw_&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfAzQkg0OB_775OS9F7QSmHxKMrjBSNFi8Rx24OISWSTTQ&oe=655888CE", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200656} -{"stream": "media", "data": {"id": "17864256500936159", "media_product_type": "REELS", "shortcode": "CscAR5EsRgA", "permalink": "https://www.instagram.com/reel/CscAR5EsRgA/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/BE4F848CC97FBA35A1AE1B1150B989A7_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=110&vs=6290041361087047_1877877688&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CRTRGODQ4Q0M5N0ZCQTM1QTFBRTFCMTE1MEI5ODlBN192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dEaE94aFJJdk1BWGZaWURBQXQyS0FLWWxOSlhicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrD%2B6LaRwf1AFQIoAkMzLBdARDmZmZmZmhgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfB3i72i3aoV6KoK_SkI7W93z4rQLdbYHatg-KzPo0ADCg&oe=655556A6&_nc_sid=1d576d", "timestamp": "2023-05-19T20:08:33+0000", "media_type": "VIDEO", "caption": "When and why you should be using Rust for Data Engineering! \n\n#rust #airbyte #coding #programming #tech #dataengineering #data", "comments_count": 0, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 7, "ig_id": "3106359072491902976", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347441626_604256678433845_716271787932876577_n.jpg?_nc_cat=108&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=2ACJfSHiIRkAX8S0ZFU&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfDJSGXmZXkQnQZmkrVUi4nadhEddZxH5LUNtELipGu4Dw&oe=655947D9", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200657} -{"stream": "media", "data": {"id": "17964324206288599", "media_product_type": "REELS", "shortcode": "CsUe2iqpQif", "permalink": "https://www.instagram.com/reel/CsUe2iqpQif/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/274503D36EA0F6E79A7CF3797A8D5985_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNTc2LmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=106&vs=1336282350269744_3931649106&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC8yNzQ1MDNEMzZFQTBGNkU3OUE3Q0YzNzk3QThENTk4NV92aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dQdzNzaFRId3VlSlBFWURBSDFmTjUzcUNhd0JicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrDwmtqO44lAFQIoAkMzLBdAIewIMSbpeRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfB1HP_4v5ndbtfq_6eWT0cxo0vqzO9F6mu5ZS-q4IRDzg&oe=65558FF2&_nc_sid=1d576d", "timestamp": "2023-05-16T22:01:45+0000", "media_type": "VIDEO", "caption": "We've all been there right? \ud83e\udd23\n\n#airbyte #data #dataengineering #datascience #dataanalytics #tech #softwareengineer", "comments_count": 0, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 13, "ig_id": "3104241732634871967", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347429218_1848940842145573_5975413208994727174_n.jpg?_nc_cat=101&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=3He_36rMQuYAX9Pz0NM&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfCAgX5HrHW8grC2x_VzJyCf2lUTViJCmwNy0uStHB-YFg&oe=6559347C", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200657} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_city", "date": "2023-11-14T08:00:00+0000", "value": {"London, England": 8, "Sydney, New South Wales": 19, "Algiers, Algiers Province": 4, "Casablanca, Grand Casablanca": 4, "S\u00e3o Paulo, S\u00e3o Paulo (state)": 14, "Rio de Janeiro, Rio de Janeiro (state)": 5, "Hong Kong, Hong Kong": 4, "Berlin, Berlin": 7, "Kolkata, West Bengal": 4, "Phoenix, Arizona": 3, "Tulsa, Oklahoma": 4, "Lagos, Lagos State": 18, "Skopje, Municipality of Centar (Skopje)": 4, "Ahmedabad, Gujarat": 4, "Moscow, Moscow": 5, "Karachi, Sindh": 4, "Bogot\u00e1, Distrito Especial": 5, "Dar es Salaam, Dar es Salaam": 7, "Jakarta, Jakarta": 10, "Accra, Greater Accra Region": 4, "Buenos Aires, Ciudad Aut\u00f3noma de Buenos Aires": 9, "Melbourne, Victoria": 7, "Delhi, Delhi": 6, "Gurugram, Haryana": 6, "Kuala Lumpur, Kuala Lumpur": 4, "Los Angeles, California": 5, "Lima, Lima Region": 5, "Istanbul, Istanbul Province": 9, "Abuja, Federal Capital Territory": 5, "Chennai, Tamil Nadu": 6, "Bangkok, Bangkok": 5, "Mexico City, Distrito Federal": 7, "Cape Town, Western Cape": 5, "San Francisco, California": 6, "Greater Noida, Uttar Pradesh": 3, "Tehran, Tehran Province": 4, "New York, New York": 13, "Cairo, Cairo Governorate": 4, "Santiago, Santiago Metropolitan Region": 6, "Dubai, Dubai": 8, "Mumbai, Maharashtra": 8, "Bangalore, Karnataka": 18, "Singapore, Singapore": 6, "Hyderabad, Telangana": 7, "San Diego, California": 6}}, "emitted_at": 1700004246978} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_country", "date": "2023-11-14T08:00:00+0000", "value": {"DE": 31, "HK": 4, "FI": 5, "TW": 5, "RU": 9, "TZ": 8, "FR": 11, "SA": 8, "BR": 64, "SE": 6, "MA": 6, "SG": 6, "DZ": 6, "ID": 29, "GB": 45, "CA": 24, "US": 264, "GH": 4, "EG": 10, "AE": 9, "CH": 7, "IN": 125, "ZA": 16, "IQ": 6, "CL": 9, "IR": 12, "GR": 6, "IT": 19, "MX": 24, "MY": 9, "CO": 11, "ES": 12, "VE": 9, "AR": 23, "AT": 4, "TH": 7, "AU": 35, "PE": 5, "PH": 7, "NG": 30, "TN": 6, "PK": 10, "PL": 5, "TR": 10, "NL": 13}}, "emitted_at": 1700004246980} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_gender_age", "date": "2023-11-14T08:00:00+0000", "value": {"F.18-24": 11, "F.25-34": 75, "F.35-44": 72, "F.45-54": 17, "F.55-64": 1, "F.65+": 2, "M.13-17": 2, "M.18-24": 50, "M.25-34": 367, "M.35-44": 226, "M.45-54": 83, "M.55-64": 20, "M.65+": 12, "U.18-24": 18, "U.25-34": 67, "U.35-44": 43, "U.45-54": 19, "U.55-64": 5}}, "emitted_at": 1700004246980} +{"stream": "users", "data": {"id": "17841408147298757", "website": "https://www.airbyte.io/", "ig_id": 8070063576, "followers_count": 1252, "name": "Jean Lafleur", "media_count": 258, "username": "airbytehq", "follows_count": 14, "biography": "Airbyte is the new open-source data integration platform that consolidates your data into your warehouses.", "profile_picture_url": "https://scontent-iev1-1.xx.fbcdn.net/v/t51.2885-15/153169696_890787328349641_8382928081987798464_n.jpg?_nc_cat=111&_nc_sid=7d201b&_nc_ohc=DFFn_25gYVMAX8nPfUd&_nc_ht=scontent-iev1-1.xx&edm=AL-3X8kEAAAA&oh=00_AfBHQPJ5aiFU1qw88d3gTF5jmg-Rpd5TX_gxAQt3jrSA4g&oe=655CCBBE", "page_id": "144706962067225"}, "emitted_at": 1700230802579} +{"stream": "media", "data": {"id": "17884386203808767", "caption": "Terraform Explained Part 1\n.\n.\n.\n#airbyte #dataengineering #tech #terraform #cloud #cloudengineer #coding #reels", "ig_id": "3123724930722523505", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/B34BFFBB0614049AD69F066D153FDD8C_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=107&vs=986202625710684_1200838240&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CMzRCRkZCQjA2MTQwNDlBRDY5RjA2NkQxNTNGREQ4Q192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dDQm9HQlV3a2JxUWwtY0JBRnZGTnFBUkdQeHpicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJtDf4euHnbtAFQIoAkMzLBdAUBtDlYEGJRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfBPpWnNa8TFbux-TpRO48bJGSkaIKPFOnmXhcv39jLd_A&oe=6559369A&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CtZs0Y3v2lx", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/353022694_609901831117241_2447211336606431614_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=1ZTHPkRhzl8AX-hZcw_&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfBdTKQTru0U2JNSqNnuPN0cWYv1u6o6t6u3EHIFteUV7w&oe=655C7D4E", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CtZs0Y3v2lx/", "timestamp": "2023-06-12T19:20:02+00:00", "like_count": 9, "comments_count": 2, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757119} +{"stream": "media", "data": {"id": "17864256500936159", "caption": "When and why you should be using Rust for Data Engineering! \n\n#rust #airbyte #coding #programming #tech #dataengineering #data", "ig_id": "3106359072491902976", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/BE4F848CC97FBA35A1AE1B1150B989A7_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=110&vs=6290041361087047_1877877688&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CRTRGODQ4Q0M5N0ZCQTM1QTFBRTFCMTE1MEI5ODlBN192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dEaE94aFJJdk1BWGZaWURBQXQyS0FLWWxOSlhicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrD%2B6LaRwf1AFQIoAkMzLBdARDmZmZmZmhgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfC6GeTJWR8KJZ3-eb1-faBZ8P8G8AFyswEDdD4gFzmPMg&oe=65594B26&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CscAR5EsRgA", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347441626_604256678433845_716271787932876577_n.jpg?_nc_cat=108&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=jLyY4sWj0v0AX-iadbF&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfA-x6QyIXxT7o_lEwDH0k7tDb_bgCGeP61AseCpluCtPA&oe=655D3C59", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CscAR5EsRgA/", "timestamp": "2023-05-19T20:08:33+00:00", "like_count": 7, "comments_count": 0, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757120} +{"stream": "media", "data": {"id": "17964324206288599", "caption": "We've all been there right? \ud83e\udd23\n\n#airbyte #data #dataengineering #datascience #dataanalytics #tech #softwareengineer", "ig_id": "3104241732634871967", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/274503D36EA0F6E79A7CF3797A8D5985_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNTc2LmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=106&vs=1336282350269744_3931649106&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC8yNzQ1MDNEMzZFQTBGNkU3OUE3Q0YzNzk3QThENTk4NV92aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dQdzNzaFRId3VlSlBFWURBSDFmTjUzcUNhd0JicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrDwmtqO44lAFQIoAkMzLBdAIewIMSbpeRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfACHaQfoSJ_vMXbm4Xw3gmWnG_vnJgUsIYUePDdtIUS-w&oe=6558DBB2&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CsUe2iqpQif", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347429218_1848940842145573_5975413208994727174_n.jpg?_nc_cat=101&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=Y6VzeGH_9lkAX_wkzpd&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfDil0e2W7Iqq0-d7rf9JkdOluS7U2C3nhK17EfQ3c07fw&oe=655D28FC", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CsUe2iqpQif/", "timestamp": "2023-05-16T22:01:45+00:00", "like_count": 13, "comments_count": 0, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757120} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_city", "date": "2023-11-17T08:00:00+00:00", "value": {"London, England": 7, "Sydney, New South Wales": 19, "Atlanta, Georgia": 4, "Algiers, Algiers Province": 4, "Caracas, Capital District": 4, "S\u00e3o Paulo, S\u00e3o Paulo (state)": 14, "Rio de Janeiro, Rio de Janeiro (state)": 5, "Hong Kong, Hong Kong": 4, "Berlin, Berlin": 8, "Kolkata, West Bengal": 5, "Tulsa, Oklahoma": 4, "Lagos, Lagos State": 16, "Dili, Timor-Leste": 3, "Ahmedabad, Gujarat": 4, "Skopje, Municipality of Centar (Skopje)": 4, "Moscow, Moscow": 5, "Karachi, Sindh": 4, "Bogot\u00e1, Distrito Especial": 5, "Dar es Salaam, Dar es Salaam": 7, "Jakarta, Jakarta": 10, "Accra, Greater Accra Region": 4, "Buenos Aires, Ciudad Aut\u00f3noma de Buenos Aires": 9, "Melbourne, Victoria": 7, "Gurugram, Haryana": 6, "Delhi, Delhi": 6, "Kuala Lumpur, Kuala Lumpur": 4, "Los Angeles, California": 5, "Lima, Lima Region": 4, "Istanbul, Istanbul Province": 9, "Chennai, Tamil Nadu": 6, "Abuja, Federal Capital Territory": 7, "Bangkok, Bangkok": 5, "Mexico City, Distrito Federal": 7, "Cape Town, Western Cape": 5, "San Francisco, California": 6, "Tehran, Tehran Province": 4, "New York, New York": 14, "Cairo, Cairo Governorate": 4, "Santiago, Santiago Metropolitan Region": 6, "Dubai, Dubai": 8, "Mumbai, Maharashtra": 8, "Bangalore, Karnataka": 18, "Singapore, Singapore": 6, "Hyderabad, Telangana": 7, "San Diego, California": 6}}, "emitted_at": 1700230802791} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_country", "date": "2023-11-17T08:00:00+00:00", "value": {"DE": 31, "HK": 4, "TW": 5, "FI": 5, "RU": 9, "TZ": 8, "FR": 10, "SA": 8, "BR": 64, "SE": 6, "SG": 6, "MA": 6, "DZ": 6, "ID": 29, "GB": 45, "CA": 24, "US": 264, "GH": 4, "EG": 10, "AE": 9, "CH": 7, "IN": 125, "ZA": 16, "IQ": 6, "CL": 9, "IR": 12, "GR": 6, "IT": 19, "MX": 24, "MY": 9, "CO": 11, "ES": 13, "VE": 9, "AR": 23, "AT": 4, "TH": 7, "AU": 35, "PE": 4, "PH": 7, "NG": 30, "TN": 6, "PK": 10, "PL": 5, "TR": 10, "NL": 13}}, "emitted_at": 1700230802792} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_gender_age", "date": "2023-11-17T08:00:00+00:00", "value": {"F.18-24": 11, "F.25-34": 75, "F.35-44": 72, "F.45-54": 17, "F.55-64": 1, "F.65+": 2, "M.13-17": 2, "M.18-24": 50, "M.25-34": 365, "M.35-44": 228, "M.45-54": 83, "M.55-64": 20, "M.65+": 12, "U.18-24": 18, "U.25-34": 67, "U.35-44": 42, "U.45-54": 19, "U.55-64": 5}}, "emitted_at": 1700230802792} diff --git a/airbyte-integrations/connectors/source-instagram/metadata.yaml b/airbyte-integrations/connectors/source-instagram/metadata.yaml index 48c76e1a70d2..13fb9d4bc74a 100644 --- a/airbyte-integrations/connectors/source-instagram/metadata.yaml +++ b/airbyte-integrations/connectors/source-instagram/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: 6acf6b55-4f1e-4fca-944e-1a3caef8aba8 - dockerImageTag: 1.0.16 + dockerImageTag: 2.0.0 dockerRepository: airbyte/source-instagram githubIssueLabel: source-instagram icon: instagram.svg @@ -19,6 +19,13 @@ data: oss: enabled: true releaseStage: generally_available + releases: + breakingChanges: + 2.0.0: + message: + This release introduces a default primary key for the streams UserLifetimeInsights and UserInsights. + Additionally, the format of timestamp fields has been updated in the UserLifetimeInsights, UserInsights, Media and Stories streams to include timezone information. + upgradeDeadline: "2023-12-03" suggestedStreams: streams: - media diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json index 4185de5f66cb..03c77796f5a0 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json @@ -53,7 +53,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] @@ -94,7 +95,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json index 7fc7fa7b40a3..876edf95ea41 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json @@ -47,7 +47,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json index fe98eafcccbf..91bc309d8eb6 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json @@ -9,7 +9,8 @@ }, "date": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "follower_count": { "type": ["null", "integer"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json index eb9bb57fc720..4cb5092f5ace 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json @@ -9,7 +9,8 @@ }, "date": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "metric": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py b/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py index 9c07d98bb083..bf5d39de1e1c 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py @@ -11,6 +11,7 @@ import pendulum from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import IncrementalMixin, Stream +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer from cached_property import cached_property from facebook_business.adobjects.igmedia import IGMedia from facebook_business.exceptions import FacebookRequestError @@ -19,6 +20,24 @@ from .common import remove_params_from_url +class DatetimeTransformerMixin: + transformer: TypeTransformer = TypeTransformer(TransformConfig.CustomSchemaNormalization) + + @staticmethod + @transformer.registerCustomTransform + def custom_transform_datetime_rfc3339(original_value, field_schema): + """ + Transform datetime string to RFC 3339 format + """ + if original_value and field_schema.get("format") == "date-time" and field_schema.get("airbyte_type") == "timestamp_with_timezone": + # Parse the ISO format timestamp + dt = pendulum.parse(original_value) + + # Convert to RFC 3339 format + return dt.to_rfc3339_string() + return original_value + + class InstagramStream(Stream, ABC): """Base stream class""" @@ -121,10 +140,10 @@ def read_records( yield self.transform(record) -class UserLifetimeInsights(InstagramStream): +class UserLifetimeInsights(DatetimeTransformerMixin, InstagramStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights""" - primary_key = None + primary_key = ["business_account_id", "metric", "date"] LIFETIME_METRICS = ["audience_city", "audience_country", "audience_gender_age", "audience_locale"] period = "lifetime" @@ -156,7 +175,7 @@ def request_params( return params -class UserInsights(InstagramIncrementalStream): +class UserInsights(DatetimeTransformerMixin, InstagramIncrementalStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights""" METRICS_BY_PERIOD = { @@ -176,7 +195,7 @@ class UserInsights(InstagramIncrementalStream): "lifetime": ["online_followers"], } - primary_key = None + primary_key = ["business_account_id", "date"] cursor_field = "date" # For some metrics we can only get insights not older than 30 days, it is Facebook policy @@ -295,7 +314,7 @@ def _state_has_legacy_format(self, state: Mapping[str, Any]) -> bool: return False -class Media(InstagramStream): +class Media(DatetimeTransformerMixin, InstagramStream): """Children objects can only be of the media_type == "CAROUSEL_ALBUM". And children object does not support INVALID_CHILDREN_FIELDS fields, so they are excluded when trying to get child objects to avoid the error @@ -403,7 +422,7 @@ def _get_insights(self, item, account_id) -> Optional[MutableMapping[str, Any]]: raise error -class Stories(InstagramStream): +class Stories(DatetimeTransformerMixin, InstagramStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/stories""" def read_records( diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py index 7b9fd1db150e..a065d01b77cf 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py @@ -35,6 +35,7 @@ def config_fixture(): def some_config_fixture(account_id): return {"start_date": "2021-01-23T00:00:00Z", "access_token": "unknown_token"} + @fixture(scope="session", name="some_config_future_date") def some_config_future_date_fixture(account_id): return {"start_date": "2030-01-23T00:00:00Z", "access_token": "unknown_token"} diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py index 2cdca11b4f8f..add26ad1a33f 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py @@ -31,11 +31,14 @@ def test_check_connection_empty_config(api): assert not ok assert error_msg + def test_check_connection_invalid_config_future_date(api, some_config_future_date): ok, error_msg = SourceInstagram().check_connection(logger, config=some_config_future_date) assert not ok assert error_msg + + def test_check_connection_no_date_config(api, some_config): some_config.pop("start_date") ok, error_msg = SourceInstagram().check_connection(logger, config=some_config) diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py index 39fc889e7e48..19470cb9c22b 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py @@ -9,6 +9,7 @@ from airbyte_cdk.models import SyncMode from facebook_business import FacebookAdsApi, FacebookSession from source_instagram.streams import ( + DatetimeTransformerMixin, InstagramStream, Media, MediaInsights, @@ -32,15 +33,11 @@ def test_clear_url(config): def test_state_outdated(api, config): - assert UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format( - {"state": MagicMock()} - ) + assert UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format({"state": MagicMock()}) def test_state_is_not_outdated(api, config): - assert not UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format( - {"state": {}} - ) + assert not UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format({"state": {}}) def test_media_get_children(api, requests_mock, some_config): @@ -208,9 +205,9 @@ def test_user_lifetime_insights_read(api, config, user_insight_data, requests_mo @pytest.mark.parametrize( "values,expected", [ - ({"end_time": "test_end_time", "value": "test_value"}, {"date": "test_end_time", "value": "test_value"}), + ({"end_time": "2020-05-04T07:00:00+0000", "value": "test_value"}, {"date": "2020-05-04T07:00:00+0000", "value": "test_value"}), ({"value": "test_value"}, {"date": None, "value": "test_value"}), - ({"end_time": "test_end_time"}, {"date": "test_end_time", "value": None}), + ({"end_time": "2020-05-04T07:00:00+0000"}, {"date": "2020-05-04T07:00:00+0000", "value": None}), ({}, {"date": None, "value": None}), ], ids=[ @@ -363,3 +360,22 @@ def test_exit_gracefully(api, config, requests_mock, caplog): assert not records assert requests_mock.call_count == 6 # 4 * 1 per `metric_to_period` map + 1 `summary` request + 1 `business_account_id` request assert "Stopping syncing stream 'user_insights'" in caplog.text + + +@pytest.mark.parametrize( + "original_value, field_schema, expected", + [ + ("2020-01-01T12:00:00Z", {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, "2020-01-01T12:00:00+00:00"), + ("2020-05-04T07:00:00+0000", {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, "2020-05-04T07:00:00+00:00"), + (None, {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, None), + ("2020-01-01T12:00:00", {"format": "date-time", "airbyte_type": "timestamp_without_timezone"}, "2020-01-01T12:00:00"), + ("2020-01-01T14:00:00", {"format": "date-time"}, "2020-01-01T14:00:00"), + ("2020-02-03T12:00:00", {"type": "string"}, "2020-02-03T12:00:00"), + ], +) +def test_custom_transform_datetime_rfc3339(original_value, field_schema, expected): + # Call the static method + result = DatetimeTransformerMixin.custom_transform_datetime_rfc3339(original_value, field_schema) + + # Assert the result matches the expected output + assert result == expected diff --git a/docs/integrations/sources/instagram-migrations.md b/docs/integrations/sources/instagram-migrations.md new file mode 100644 index 000000000000..f9009b09e3b5 --- /dev/null +++ b/docs/integrations/sources/instagram-migrations.md @@ -0,0 +1,9 @@ +# Instagram Migration Guide + +## Upgrading to 2.0.0 + +This release adds a default primary key for the streams UserLifetimeInsights and UserInsights, and updates the format of timestamp fields in the UserLifetimeInsights, UserInsights, Media and Stories streams to include timezone information. + +To ensure uninterrupted syncs, users should: +- Refresh the source schema +- Reset affected streams \ No newline at end of file diff --git a/docs/integrations/sources/instagram.md b/docs/integrations/sources/instagram.md index 307acbd686ac..7b4999945fd4 100644 --- a/docs/integrations/sources/instagram.md +++ b/docs/integrations/sources/instagram.md @@ -93,6 +93,7 @@ AirbyteRecords are required to conform to the [Airbyte type](https://docs.airbyt | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------| +| 2.0.0 | 2023-11-17 | [32500](https://github.com/airbytehq/airbyte/pull/32500) | Add primary keys for UserLifetimeInsights and UserInsights; add airbyte_type to timestamp fields | | 1.0.16 | 2023-11-17 | [32627](https://github.com/airbytehq/airbyte/pull/32627) | Fix start_date type; fix docs | | 1.0.15 | 2023-11-14 | [32494](https://github.com/airbytehq/airbyte/pull/32494) | Marked start_date as optional; set max retry time to 10 minutes; add suggested streams | | 1.0.14 | 2023-11-13 | [32423](https://github.com/airbytehq/airbyte/pull/32423) | Capture media_product_type column in media and stories stream |