diff --git a/CHANGELOG.md b/CHANGELOG.md index 1832d6c..1a5fd21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## 1.7.6 + * Fix Infinite Loop for Users [#103](https://github.com/singer-io/tap-zendesk/pull/103) ## 1.7.5 * Added support for backoff and retry for error 409 [#107](https://github.com/singer-io/tap-zendesk/pull/107) * Code Formatting [#107](https://github.com/singer-io/tap-zendesk/pull/107) diff --git a/setup.py b/setup.py index a1b1f6e..c46345b 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-zendesk', - version='1.7.5', + version='1.7.6', description='Singer.io tap for extracting data from the Zendesk API', author='Stitch', url='https://singer.io', diff --git a/tap_zendesk/streams.py b/tap_zendesk/streams.py index 67b3094..df16a82 100644 --- a/tap_zendesk/streams.py +++ b/tap_zendesk/streams.py @@ -77,6 +77,10 @@ def __init__(self, client=None, config=None): else: self.request_timeout = REQUEST_TIMEOUT # If value is 0,"0","" or not passed then it set default to 300 seconds. + # To avoid infinite loop behavior we should not configure search window less than 2 + if config.get('search_window_size') and int(config.get('search_window_size')) < 2: + raise ValueError('Search window size cannot be less than 2') + def get_bookmark(self, state): return utils.strptime_with_tz(singer.get_bookmark(state, self.name, self.replication_key)) @@ -244,20 +248,21 @@ def sync(self, state): while start < sync_end: parsed_start = singer.strftime(start, "%Y-%m-%dT%H:%M:%SZ") parsed_end = min(singer.strftime(end, "%Y-%m-%dT%H:%M:%SZ"), parsed_sync_end) - LOGGER.info("Querying for users between %s and %s", parsed_start, parsed_end) + LOGGER.info("Querying for users with window of exclusive boundaries between %s and %s", parsed_start, parsed_end) users = self.client.search("", updated_after=parsed_start, updated_before=parsed_end, type="user") # NB: Zendesk will return an error on the 1001st record, so we # need to check total response size before iterating # See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits if users.count > 1000: - if search_window_size > 1: + # To avoid infinite loop behavior we should reduce the window if it is greater than 2 + if search_window_size > 2: search_window_size = search_window_size // 2 end = start + datetime.timedelta(seconds=search_window_size) LOGGER.info("users - Detected Search API response size too large. Cutting search window in half to %s seconds.", search_window_size) continue - raise Exception("users - Unable to get all users within minimum window of a single second ({}), found {} users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits".format(parsed_start, users.count)) + raise Exception("users - Unable to get all users within minimum window of a single second ({}), found {} users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits".format(datetime.datetime.strftime(datetime.datetime.strptime(parsed_start, "%Y-%m-%dT%H:%M:%SZ") + datetime.timedelta(seconds=1), "%Y-%m-%dT%H:%M:%SZ"), users.count)) # Consume the records to account for dates lower than window start users = [user for user in users] # pylint: disable=unnecessary-comprehension diff --git a/test/unittests/test_user_infinite_loop.py b/test/unittests/test_user_infinite_loop.py new file mode 100644 index 0000000..db6d916 --- /dev/null +++ b/test/unittests/test_user_infinite_loop.py @@ -0,0 +1,70 @@ +import unittest +from unittest import mock +import tap_zendesk +from tap_zendesk.streams import Users +from tap_zendesk.streams import STREAMS +import singer +import datetime +from dateutil import tz + + +class MockSearch: + def __init__(self): + self.count = 1001 + self.updated_at = "test" + + def __iter__(self): + return (self for x in range(4)) + + def search(self, test, updated_after, updated_before, type="user" ): + # For window size less than 2 return less than or equal to 1000 records and for larger window return greater than 1000 records + if (singer.strptime(updated_before) - singer.strptime(updated_after)).seconds < 2: + self.count = 999 + else: + self.count = 1001 + return self + +class TestUserSyncCheck(unittest.TestCase): + @mock.patch('tap_zendesk.singer.utils.now', side_effect=lambda : datetime.datetime(2022, 4, 7, 1, 45, 21, 840535, tzinfo=tz.UTC)) + def test_many_records_in_one_seconds_for_user(self, mocked_now): + """ + Reproduce infinite looping behavior for Users stream when user have many record in single seconds + """ + user_obj = Users(MockSearch(), {}) + + with self.assertRaises(Exception) as e: + l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}})) + + self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits') + + def test_many_records_in_one_seconds_for_user_with_3_sec_window(self): + """ + To verify that if user give 3 seconds window then also we don't get infinite loop behavior + """ + user_obj = Users(client=MockSearch(), config={'search_window_size': 3}) + + with self.assertRaises(Exception) as e: + l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}})) + + self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits') + + def test_many_records_in_one_seconds_for_user_with_2_sec_window(self): + """ + To verify that if user give 2 seconds window then also we don't get infinite loop behavior + """ + user_obj = Users(client=MockSearch(), config={'search_window_size': 2}) + + with self.assertRaises(Exception) as e: + l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}})) + + self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits') + + def test_search_window_size_equals_1_sec(self): + """ + To verify that search window size 1 second cannot be configured + """ + + with self.assertRaises(ValueError) as e: + Users(client=MockSearch(), config={'search_window_size': 1}) + + self.assertEqual(str(e.exception), 'Search window size cannot be less than 2')