From 1fdac5214cdab0d763eb0ea18113727c8705fe88 Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk Date: Mon, 20 Nov 2023 12:47:10 +0200 Subject: [PATCH 1/3] Fix max waiting time for Reports streams --- .../source_pinterest/reports/reports.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py index 04e85473ff21..81babbc87a3c 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py @@ -26,7 +26,7 @@ class PinterestAnalyticsReportStream(PinterestAnalyticsStream): Details - https://developers.pinterest.com/docs/api/v5/#operation/analytics/create_report""" http_method = "POST" - report_wait_timeout = 180 + report_wait_timeout = 60 * 10 report_generation_maximum_retries = 5 @property @@ -69,7 +69,7 @@ def request_params( def backoff_max_time(func): def wrapped(self, *args, **kwargs): - return backoff.on_exception(backoff.constant, RetryableException, max_time=self.report_wait_timeout * 60, interval=10)(func)( + return backoff.on_exception(backoff.constant, RetryableException, max_time=self.report_wait_timeout, interval=10)(func)( self, *args, **kwargs ) @@ -77,9 +77,9 @@ def wrapped(self, *args, **kwargs): def backoff_max_tries(func): def wrapped(self, *args, **kwargs): - return backoff.on_exception(backoff.expo, ReportGenerationFailure, max_tries=self.report_generation_maximum_retries)(func)( - self, *args, **kwargs - ) + return backoff.on_exception( + backoff.expo, ReportGenerationFailure, max_tries=self.report_generation_maximum_retries, max_time=self.report_wait_timeout + )(func)(self, *args, **kwargs) return wrapped From 19f81a239cf7a5e5de4399701e9f917ff501f74c Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk Date: Mon, 20 Nov 2023 12:49:28 +0200 Subject: [PATCH 2/3] Add raising error if waiting time is longer than 10 minutes --- .../source-pinterest/source_pinterest/streams.py | 11 ++++++++++- .../source-pinterest/unit_tests/test_source.py | 14 ++++++-------- .../source-pinterest/unit_tests/test_streams.py | 16 ++++++++++++++-- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py index 0f30a44e093a..ab36624e6424 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py @@ -29,6 +29,10 @@ class NonJSONResponse(Exception): pass +class RateLimitExceeded(Exception): + pass + + class PinterestStream(HttpStream, ABC): url_base = "https://api.pinterest.com/v5/" primary_key = "id" @@ -90,7 +94,12 @@ def should_retry(self, response: requests.Response) -> bool: def backoff_time(self, response: requests.Response) -> Optional[float]: if response.status_code == requests.codes.too_many_requests: self.logger.error(f"For stream {self.name} rate limit exceeded.") - return float(response.headers.get("X-RateLimit-Reset", 0)) + sleep_time = float(response.headers.get("X-RateLimit-Reset", 0)) + if sleep_time > 600: + raise RateLimitExceeded( + f"Rate limit exceeded for stream {self.name}. Waiting time is longer than 10 minutes: {sleep_time}s." + ) + return sleep_time class PinterestSubStream(HttpSubStream): diff --git a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py index 2fd50933d8e7..615e0aa9c109 100644 --- a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py @@ -41,16 +41,14 @@ def test_check_wrong_date_connection(wrong_date_config): @responses.activate def test_check_connection_expired_token(test_config): - responses.add( - responses.POST, - "https://api.pinterest.com/v5/oauth/token", - status=401 - ) + responses.add(responses.POST, "https://api.pinterest.com/v5/oauth/token", status=401) source = SourcePinterest() logger_mock = MagicMock() - assert source.check_connection(logger_mock, test_config) == (False, - 'Try to re-authenticate because current refresh token is not valid. ' \ - '401 Client Error: Unauthorized for url: https://api.pinterest.com/v5/oauth/token') + assert source.check_connection(logger_mock, test_config) == ( + False, + "Try to re-authenticate because current refresh token is not valid. " + "401 Client Error: Unauthorized for url: https://api.pinterest.com/v5/oauth/token", + ) def test_get_authenticator(test_config): diff --git a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py index a560986ec482..bca080a29216 100644 --- a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py @@ -30,6 +30,7 @@ Keywords, PinterestStream, PinterestSubStream, + RateLimitExceeded, UserAccountAnalytics, ) @@ -148,6 +149,12 @@ def test_non_json_response(requests_mock): "test_response, test_headers, status_code, expected", [ ({"code": 7, "message": "Some other error message"}, {"X-RateLimit-Reset": "2"}, 429, 2.0), + ( + {"code": 7, "message": "Some other error message"}, + {"X-RateLimit-Reset": "2000"}, + 429, + (RateLimitExceeded, "Rate limit exceeded for stream boards. Waiting time is longer than 10 minutes: 2000.0s."), + ), ], ) def test_backoff_on_rate_limit_error(requests_mock, test_response, status_code, test_headers, expected): @@ -161,8 +168,13 @@ def test_backoff_on_rate_limit_error(requests_mock, test_response, status_code, ) response = requests.get(url) - result = stream.backoff_time(response) - assert result == expected + + if isinstance(expected, tuple): + with pytest.raises(expected[0], match=expected[1]): + stream.backoff_time(response) + else: + result = stream.backoff_time(response) + assert result == expected @pytest.mark.parametrize( From ffea3102183e7dbe96cf887e806738ce9c4cccee Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk Date: Mon, 20 Nov 2023 13:50:13 +0200 Subject: [PATCH 3/3] Update version --- .../connectors/source-pinterest/metadata.yaml | 2 +- docs/integrations/sources/pinterest.md | 53 ++++++++++--------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/airbyte-integrations/connectors/source-pinterest/metadata.yaml b/airbyte-integrations/connectors/source-pinterest/metadata.yaml index a0da1eb5a704..7ca704836e25 100644 --- a/airbyte-integrations/connectors/source-pinterest/metadata.yaml +++ b/airbyte-integrations/connectors/source-pinterest/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: 5cb7e5fe-38c2-11ec-8d3d-0242ac130003 - dockerImageTag: 0.8.1 + dockerImageTag: 0.8.2 dockerRepository: airbyte/source-pinterest connectorBuildOptions: baseImage: docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c diff --git a/docs/integrations/sources/pinterest.md b/docs/integrations/sources/pinterest.md index 1aae30167248..a1744e8a2b06 100644 --- a/docs/integrations/sources/pinterest.md +++ b/docs/integrations/sources/pinterest.md @@ -86,32 +86,33 @@ The connector is restricted by the Pinterest [requests limitation](https://devel | Version | Date | Pull Request | Subject | |:--------|:-----------| :------------------------------------------------------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.8.2 | 2023-11-20 | [32672](https://github.com/airbytehq/airbyte/pull/32672) | Fix backoff waiting time | | 0.8.1 | 2023-11-16 | [32601](https://github.com/airbytehq/airbyte/pull/32601) | added ability to create custom reports | | 0.8.0 | 2023-11-16 | [32592](https://github.com/airbytehq/airbyte/pull/32592) | Make start_date optional; add suggested streams; add missing fields | | 0.7.2 | 2023-11-08 | [32299](https://github.com/airbytehq/airbyte/pull/32299) | added default `AvailabilityStrategy`, fixed bug which cases duplicated requests, added new streams: Catalogs, CatalogsFeeds, CatalogsProductGroups, Audiences, Keywords, ConversionTags, CustomerLists, CampaignTargetingReport, AdvertizerReport, AdvertizerTargetingReport, AdGroupReport, AdGroupTargetingReport, PinPromotionReport, PinPromotionTargetingReport, ProductGroupReport, ProductGroupTargetingReport, ProductItemReport, KeywordReport | -| 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | -| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | -| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | -| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | -| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | -| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | -| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | -| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | -| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | -| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | -| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | -| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | -| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | -| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | -| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | -| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | -| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | -| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | -| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | -| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | -| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | -| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | -| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | -| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | -| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector | +| 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | +| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | +| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | +| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | +| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | +| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | +| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | +| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | +| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | +| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | +| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | +| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | +| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | +| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | +| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | +| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | +| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | +| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | +| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | +| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | +| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | +| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | +| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | +| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | +| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector |