From 0c8c6666ba96a8783cba6acb72fb3a1a83cb2835 Mon Sep 17 00:00:00 2001 From: Bryant Gray Date: Tue, 17 Oct 2023 17:31:32 +0000 Subject: [PATCH 1/3] Get page_size from config or use default --- tap_zendesk/http.py | 8 ++++---- tap_zendesk/streams.py | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tap_zendesk/http.py b/tap_zendesk/http.py index 3f44006..62e3028 100644 --- a/tap_zendesk/http.py +++ b/tap_zendesk/http.py @@ -149,7 +149,7 @@ def call_api(url, request_timeout, params, headers): raise_for_error(response) return response -def get_cursor_based(url, access_token, request_timeout, cursor=None, **kwargs): +def get_cursor_based(url, access_token, request_timeout, page_size, cursor=None, **kwargs): headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', @@ -158,7 +158,7 @@ def get_cursor_based(url, access_token, request_timeout, cursor=None, **kwargs): } params = { - 'page[size]': 100, + 'page[size]': page_size, **kwargs.get('params', {}) } @@ -181,7 +181,7 @@ def get_cursor_based(url, access_token, request_timeout, cursor=None, **kwargs): yield response_json has_more = response_json['meta']['has_more'] -def get_offset_based(url, access_token, request_timeout, **kwargs): +def get_offset_based(url, access_token, request_timeout, page_size, **kwargs): headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', @@ -190,7 +190,7 @@ def get_offset_based(url, access_token, request_timeout, **kwargs): } params = { - 'per_page': 100, + 'per_page': page_size, **kwargs.get('params', {}) } diff --git a/tap_zendesk/streams.py b/tap_zendesk/streams.py index ecf47ac..e33157a 100644 --- a/tap_zendesk/streams.py +++ b/tap_zendesk/streams.py @@ -14,6 +14,7 @@ LOGGER = singer.get_logger() KEY_PROPERTIES = ['id'] +DEFAULT_PAGE_SIZE = 100 REQUEST_TIMEOUT = 300 START_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ" HEADERS = { @@ -58,6 +59,7 @@ class Stream(): stream = None endpoint = None request_timeout = None + page_size = None def __init__(self, client=None, config=None): self.client = client @@ -73,6 +75,12 @@ def __init__(self, client=None, config=None): if config.get('search_window_size') and int(config.get('search_window_size')) < 2: raise ValueError('Search window size cannot be less than 2') + config_page_size = self.config.get('page_size') + if config_page_size and 1 <= int(config_page_size) <= 1000: # Zendesk's max page size + self.page_size = int(config_page_size) + else: + self.page_size = DEFAULT_PAGE_SIZE + def get_bookmark(self, state): return utils.strptime_with_tz(singer.get_bookmark(state, self.name, self.replication_key)) @@ -131,7 +139,7 @@ def get_objects(self, **kwargs): ''' url = self.endpoint.format(self.config['subdomain']) # Pass `request_timeout` parameter - for page in http.get_cursor_based(url, self.config['access_token'], self.request_timeout, **kwargs): + for page in http.get_cursor_based(url, self.config['access_token'], self.request_timeout, self.page_size, **kwargs): yield from page[self.item_key] class CursorBasedExportStream(Stream): @@ -348,7 +356,7 @@ class TicketAudits(Stream): def get_objects(self, ticket_id): url = self.endpoint.format(self.config['subdomain'], ticket_id) # Pass `request_timeout` parameter - pages = http.get_offset_based(url, self.config['access_token'], self.request_timeout) + pages = http.get_offset_based(url, self.config['access_token'], self.request_timeout, self.page_size) for page in pages: yield from page[self.item_key] @@ -383,7 +391,7 @@ def sync(self, ticket_id): # Only 1 ticket metric per ticket url = self.endpoint.format(self.config['subdomain'], ticket_id) # Pass `request_timeout` - pages = http.get_offset_based(url, self.config['access_token'], self.request_timeout) + pages = http.get_offset_based(url, self.config['access_token'], self.request_timeout, self.page_size) for page in pages: zendesk_metrics.capture('ticket_metric') self.count += 1 @@ -439,7 +447,7 @@ class TicketComments(Stream): def get_objects(self, ticket_id): url = self.endpoint.format(self.config['subdomain'], ticket_id) # Pass `request_timeout` parameter - pages = http.get_offset_based(url, self.config['access_token'], self.request_timeout) + pages = http.get_offset_based(url, self.config['access_token'], self.request_timeout, self.page_size) for page in pages: yield from page[self.item_key] From c26130e3537d30ee4ce267afc84ab1833d7cefb6 Mon Sep 17 00:00:00 2001 From: Bryant Gray Date: Tue, 17 Oct 2023 18:04:20 +0000 Subject: [PATCH 2/3] fix unit tests --- test/unittests/test_http.py | 119 ++++++++++++++----------- test/unittests/test_request_timeout.py | 17 +++- 2 files changed, 78 insertions(+), 58 deletions(-) diff --git a/test/unittests/test_http.py b/test/unittests/test_http.py index f8d8059..99c9fcf 100644 --- a/test/unittests/test_http.py +++ b/test/unittests/test_http.py @@ -43,6 +43,7 @@ def json(self): PAGINATE_RESPONSE = {"meta": {"has_more": True, "after_cursor": "some_cursor"}} +PAGE_SIZE = 100 REQUEST_TIMEOUT = 300 @@ -76,11 +77,10 @@ class TestBackoff(unittest.TestCase): def test_get_cursor_based_gets_one_page(self, mock_get, mock_sleep): responses = [ response - for response in http.get_cursor_based( - url="some_url", - access_token="some_token", - request_timeout=REQUEST_TIMEOUT, - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=REQUEST_TIMEOUT, + page_size=PAGE_SIZE) ] actual_response = responses[0] self.assertDictEqual(SINGLE_RESPONSE, actual_response) @@ -99,11 +99,10 @@ def test_get_cursor_based_gets_one_page(self, mock_get, mock_sleep): def test_get_cursor_based_can_paginate(self, mock_get, mock_sleep): responses = [ response - for response in http.get_cursor_based( - url="some_url", - access_token="some_token", - request_timeout=REQUEST_TIMEOUT, - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=REQUEST_TIMEOUT, + page_size=PAGE_SIZE) ] self.assertDictEqual({"key1": "val1", **PAGINATE_RESPONSE}, responses[0]) @@ -137,11 +136,10 @@ def test_get_cursor_based_handles_429(self, mock_get, mock_sleep): """ responses = [ response - for response in http.get_cursor_based( - url="some_url", - access_token="some_token", - request_timeout=REQUEST_TIMEOUT, - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=REQUEST_TIMEOUT, + page_size=PAGE_SIZE) ] actual_response = responses[0] self.assertDictEqual({"key1": "val1", **SINGLE_RESPONSE}, actual_response) @@ -157,9 +155,10 @@ def test_get_cursor_based_handles_400(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskBadRequestError as e: @@ -182,9 +181,10 @@ def test_get_cursor_based_handles_400_api_error_message(self, mock_get, mock_sle try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskBadRequestError as e: @@ -204,9 +204,10 @@ def test_get_cursor_based_handles_401(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskUnauthorizedError as e: expected_error_message = ( @@ -226,9 +227,10 @@ def test_get_cursor_based_handles_404(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskNotFoundError as e: expected_error_message = "HTTP-error-code: 404, Error: The resource you have specified cannot be found." @@ -247,9 +249,10 @@ def test_get_cursor_based_handles_409(self, mocked_request, mock_api_token): with self.assertRaises(http.ZendeskConflictError) as e: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] expected_error_message = "HTTP-error-code: 409, Error: The API request cannot be completed because the requested operation would conflict with an existing item." self.assertEqual(str(e), expected_error_message) @@ -264,9 +267,10 @@ def test_get_cursor_based_handles_422(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskUnprocessableEntityError as e: expected_error_message = "HTTP-error-code: 422, Error: The request content itself is not processable by the server." @@ -287,9 +291,10 @@ def test_get_cursor_based_handles_500(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskInternalServerError as e: expected_error_message = ( @@ -313,9 +318,10 @@ def test_get_cursor_based_handles_501(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskNotImplementedError as e: expected_error_message = "HTTP-error-code: 501, Error: The server does not support the functionality required to fulfill the request." @@ -336,9 +342,10 @@ def test_get_cursor_based_handles_502(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskBadGatewayError as e: expected_error_message = ( @@ -359,9 +366,10 @@ def test_get_cursor_based_handles_444(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskError as e: expected_error_message = "HTTP-error-code: 444, Error: Unknown Error" @@ -422,9 +430,10 @@ def test_get_cursor_based_handles_524(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskError as e: expected_error_message = "HTTP-error-code: 524, Error: Unknown Error" @@ -445,9 +454,10 @@ def test_get_cursor_based_handles_520(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskError as e: expected_error_message = "HTTP-error-code: 520, Error: Unknown Error" @@ -468,9 +478,10 @@ def test_get_cursor_based_handles_503(self, mock_get, mock_sleep): try: responses = [ response - for response in http.get_cursor_based( - url="some_url", access_token="some_token", request_timeout=300 - ) + for response in http.get_cursor_based(url="some_url", + access_token="some_token", + request_timeout=300, + page_size=PAGE_SIZE) ] except http.ZendeskServiceUnavailableError as e: expected_error_message = ( diff --git a/test/unittests/test_request_timeout.py b/test/unittests/test_request_timeout.py index 9125568..785c06f 100644 --- a/test/unittests/test_request_timeout.py +++ b/test/unittests/test_request_timeout.py @@ -14,6 +14,7 @@ REQUEST_TIMEOUT = 300 REQUEST_TIMEOUT_STR = "300" REQUEST_TIMEOUT_FLOAT = 300.05 +PAGE_SIZE = 100 SINGLE_RESPONSE = { 'meta': {'has_more': False} @@ -58,7 +59,9 @@ def test_get_cursor_based_handles_timeout_error(self, mock_get, mock_sleep): try: responses = [response for response in http.get_cursor_based(url='some_url', - access_token='some_token', request_timeout=REQUEST_TIMEOUT)] + access_token='some_token', + request_timeout=REQUEST_TIMEOUT, + page_size=PAGE_SIZE)] except requests.exceptions.Timeout as e: pass @@ -74,7 +77,9 @@ def test_get_cursor_based_handles_timeout_error_in_pagination_call(self, mock_ge try: responses = [response for response in http.get_cursor_based(url='some_url', - access_token='some_token', request_timeout=REQUEST_TIMEOUT)] + access_token='some_token', + request_timeout=REQUEST_TIMEOUT, + page_size=PAGE_SIZE)] except requests.exceptions.Timeout as e: pass @@ -88,7 +93,9 @@ def test_get_offset_based_handles_timeout_error(self, mock_get, mock_sleep): try: responses = [response for response in http.get_offset_based(url='some_url', - access_token='some_token', request_timeout=REQUEST_TIMEOUT)] + access_token='some_token', + request_timeout=REQUEST_TIMEOUT, + page_size=PAGE_SIZE)] except requests.exceptions.Timeout as e: pass @@ -104,7 +111,9 @@ def test_get_offset_based_handles_timeout_error_in_pagination_call(self, mock_ge try: responses = [response for response in http.get_offset_based(url='some_url', - access_token='some_token', request_timeout=REQUEST_TIMEOUT)] + access_token='some_token', + request_timeout=REQUEST_TIMEOUT, + page_size=PAGE_SIZE)] except requests.exceptions.Timeout as e: pass From f6b2cc0fc4e9b862e54f0448ac0591681cb010be Mon Sep 17 00:00:00 2001 From: Bryant Gray Date: Tue, 17 Oct 2023 18:15:25 +0000 Subject: [PATCH 3/3] Bump version and update changelog --- CHANGELOG.md | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88bfc5f..6f3d85d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## 2.3.0 + * Adds configurable page size for requests [#141](https://github.com/singer-io/tap-zendesk/pull/141) ## 2.2.0 * Adds Support for lookup fields [#124](https://github.com/singer-io/tap-zendesk/pull/124) diff --git a/setup.py b/setup.py index 68bd77a..85af1d0 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-zendesk', - version='2.2.0', + version='2.3.0', description='Singer.io tap for extracting data from the Zendesk API', author='Stitch', url='https://singer.io',