Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TDL-6756: Fix Infinite Loop for Users #103

Merged
merged 11 commits into from
May 25, 2023
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Changelog

## 1.7.6
* Fix Infinite Loop for Users [#103](https://github.com/singer-io/tap-zendesk/pull/103)
## 1.7.5
* Added support for backoff and retry for error 409 [#107](https://github.com/singer-io/tap-zendesk/pull/107)
* Code Formatting [#107](https://github.com/singer-io/tap-zendesk/pull/107)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup

setup(name='tap-zendesk',
version='1.7.5',
version='1.7.6',
description='Singer.io tap for extracting data from the Zendesk API',
author='Stitch',
url='https://singer.io',
Expand Down
11 changes: 8 additions & 3 deletions tap_zendesk/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ def __init__(self, client=None, config=None):
else:
self.request_timeout = REQUEST_TIMEOUT # If value is 0,"0","" or not passed then it set default to 300 seconds.

# To avoid infinite loop behavior we should not configure search window less than 2
if config.get('search_window_size') and int(config.get('search_window_size')) < 2:
raise ValueError('Search window size cannot be less than 2')

def get_bookmark(self, state):
return utils.strptime_with_tz(singer.get_bookmark(state, self.name, self.replication_key))

Expand Down Expand Up @@ -244,20 +248,21 @@ def sync(self, state):
while start < sync_end:
parsed_start = singer.strftime(start, "%Y-%m-%dT%H:%M:%SZ")
parsed_end = min(singer.strftime(end, "%Y-%m-%dT%H:%M:%SZ"), parsed_sync_end)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to have, elaborate why we need to take min of 2 timestamps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tap is using window so if 'end' is minimum than window size then consider it otherwise it will use normal 'end' (start + date window)

LOGGER.info("Querying for users between %s and %s", parsed_start, parsed_end)
LOGGER.info("Querying for users with window of exclusive boundaries between %s and %s", parsed_start, parsed_end)
users = self.client.search("", updated_after=parsed_start, updated_before=parsed_end, type="user")

# NB: Zendesk will return an error on the 1001st record, so we
# need to check total response size before iterating
# See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits
if users.count > 1000:
if search_window_size > 1:
# To avoid infinite loop behavior we should reduce the window if it is greater than 2
if search_window_size > 2:
dbshah1212 marked this conversation as resolved.
Show resolved Hide resolved
search_window_size = search_window_size // 2
end = start + datetime.timedelta(seconds=search_window_size)
LOGGER.info("users - Detected Search API response size too large. Cutting search window in half to %s seconds.", search_window_size)
continue

raise Exception("users - Unable to get all users within minimum window of a single second ({}), found {} users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits".format(parsed_start, users.count))
raise Exception("users - Unable to get all users within minimum window of a single second ({}), found {} users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits".format(datetime.datetime.strftime(datetime.datetime.strptime(parsed_start, "%Y-%m-%dT%H:%M:%SZ") + datetime.timedelta(seconds=1), "%Y-%m-%dT%H:%M:%SZ"), users.count))

# Consume the records to account for dates lower than window start
users = [user for user in users] # pylint: disable=unnecessary-comprehension
Expand Down
70 changes: 70 additions & 0 deletions test/unittests/test_user_infinite_loop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import unittest
from unittest import mock
import tap_zendesk
from tap_zendesk.streams import Users
from tap_zendesk.streams import STREAMS
import singer
import datetime
from dateutil import tz


class MockSearch:
def __init__(self):
self.count = 1001
self.updated_at = "test"

def __iter__(self):
return (self for x in range(4))

def search(self, test, updated_after, updated_before, type="user" ):
# For window size less than 2 return less than or equal to 1000 records and for larger window return greater than 1000 records
if (singer.strptime(updated_before) - singer.strptime(updated_after)).seconds < 2:
self.count = 999
else:
self.count = 1001
return self

class TestUserSyncCheck(unittest.TestCase):
@mock.patch('tap_zendesk.singer.utils.now', side_effect=lambda : datetime.datetime(2022, 4, 7, 1, 45, 21, 840535, tzinfo=tz.UTC))
def test_many_records_in_one_seconds_for_user(self, mocked_now):
"""
Reproduce infinite looping behavior for Users stream when user have many record in single seconds
"""
user_obj = Users(MockSearch(), {})

with self.assertRaises(Exception) as e:
l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}}))

self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits')

def test_many_records_in_one_seconds_for_user_with_3_sec_window(self):
dbshah1212 marked this conversation as resolved.
Show resolved Hide resolved
"""
To verify that if user give 3 seconds window then also we don't get infinite loop behavior
"""
user_obj = Users(client=MockSearch(), config={'search_window_size': 3})

with self.assertRaises(Exception) as e:
l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}}))

self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits')

def test_many_records_in_one_seconds_for_user_with_2_sec_window(self):
"""
To verify that if user give 2 seconds window then also we don't get infinite loop behavior
"""
user_obj = Users(client=MockSearch(), config={'search_window_size': 2})

with self.assertRaises(Exception) as e:
l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}}))

self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits')

def test_search_window_size_equals_1_sec(self):
"""
To verify that search window size 1 second cannot be configured
"""

with self.assertRaises(ValueError) as e:
Users(client=MockSearch(), config={'search_window_size': 1})

self.assertEqual(str(e.exception), 'Search window size cannot be less than 2')