Skip to content

Commit

Permalink
TDL-6756: Fix Infinite Loop for Users (singer-io#103)
Browse files Browse the repository at this point in the history
* Reproduce infinite loop behaviour in unittest 'test_user_infinite_loop.py' and correct that behaviour

* Correct exception mesaage in unittest

* change logger message

* correct logger message

* add comment in streams.py and add 2sec window test case

* add comment in streams.py and add 2sec window test case

* Change exception message

* Correct comment message

* resolved unitetst error

* update behaviour for 1 second window size

* changelog and verison bump

---------

Co-authored-by: Rushikesh Todkar <98420315+RushiT0122@users.noreply.github.com>
Co-authored-by: kethan1122 <kcherukuri@talend.com>
  • Loading branch information
3 people authored and karthipillai committed Oct 2, 2023
1 parent 86f41ea commit d9473d9
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Changelog

## 1.7.6
* Fix Infinite Loop for Users [#103](https://github.com/singer-io/tap-zendesk/pull/103)
## 1.7.5
* Added support for backoff and retry for error 409 [#107](https://github.com/singer-io/tap-zendesk/pull/107)
* Code Formatting [#107](https://github.com/singer-io/tap-zendesk/pull/107)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup

setup(name='tap-zendesk',
version='1.7.5',
version='1.7.6',
description='Singer.io tap for extracting data from the Zendesk API',
author='Stitch',
url='https://singer.io',
Expand Down
11 changes: 8 additions & 3 deletions tap_zendesk/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ def __init__(self, client=None, config=None):
else:
self.request_timeout = REQUEST_TIMEOUT # If value is 0,"0","" or not passed then it set default to 300 seconds.

# To avoid infinite loop behavior we should not configure search window less than 2
if config.get('search_window_size') and int(config.get('search_window_size')) < 2:
raise ValueError('Search window size cannot be less than 2')

def get_bookmark(self, state):
return utils.strptime_with_tz(singer.get_bookmark(state, self.name, self.replication_key))

Expand Down Expand Up @@ -244,20 +248,21 @@ def sync(self, state):
while start < sync_end:
parsed_start = singer.strftime(start, "%Y-%m-%dT%H:%M:%SZ")
parsed_end = min(singer.strftime(end, "%Y-%m-%dT%H:%M:%SZ"), parsed_sync_end)
LOGGER.info("Querying for users between %s and %s", parsed_start, parsed_end)
LOGGER.info("Querying for users with window of exclusive boundaries between %s and %s", parsed_start, parsed_end)
users = self.client.search("", updated_after=parsed_start, updated_before=parsed_end, type="user")

# NB: Zendesk will return an error on the 1001st record, so we
# need to check total response size before iterating
# See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits
if users.count > 1000:
if search_window_size > 1:
# To avoid infinite loop behavior we should reduce the window if it is greater than 2
if search_window_size > 2:
search_window_size = search_window_size // 2
end = start + datetime.timedelta(seconds=search_window_size)
LOGGER.info("users - Detected Search API response size too large. Cutting search window in half to %s seconds.", search_window_size)
continue

raise Exception("users - Unable to get all users within minimum window of a single second ({}), found {} users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits".format(parsed_start, users.count))
raise Exception("users - Unable to get all users within minimum window of a single second ({}), found {} users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits".format(datetime.datetime.strftime(datetime.datetime.strptime(parsed_start, "%Y-%m-%dT%H:%M:%SZ") + datetime.timedelta(seconds=1), "%Y-%m-%dT%H:%M:%SZ"), users.count))

# Consume the records to account for dates lower than window start
users = [user for user in users] # pylint: disable=unnecessary-comprehension
Expand Down
70 changes: 70 additions & 0 deletions test/unittests/test_user_infinite_loop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import unittest
from unittest import mock
import tap_zendesk
from tap_zendesk.streams import Users
from tap_zendesk.streams import STREAMS
import singer
import datetime
from dateutil import tz


class MockSearch:
def __init__(self):
self.count = 1001
self.updated_at = "test"

def __iter__(self):
return (self for x in range(4))

def search(self, test, updated_after, updated_before, type="user" ):
# For window size less than 2 return less than or equal to 1000 records and for larger window return greater than 1000 records
if (singer.strptime(updated_before) - singer.strptime(updated_after)).seconds < 2:
self.count = 999
else:
self.count = 1001
return self

class TestUserSyncCheck(unittest.TestCase):
@mock.patch('tap_zendesk.singer.utils.now', side_effect=lambda : datetime.datetime(2022, 4, 7, 1, 45, 21, 840535, tzinfo=tz.UTC))
def test_many_records_in_one_seconds_for_user(self, mocked_now):
"""
Reproduce infinite looping behavior for Users stream when user have many record in single seconds
"""
user_obj = Users(MockSearch(), {})

with self.assertRaises(Exception) as e:
l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}}))

self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits')

def test_many_records_in_one_seconds_for_user_with_3_sec_window(self):
"""
To verify that if user give 3 seconds window then also we don't get infinite loop behavior
"""
user_obj = Users(client=MockSearch(), config={'search_window_size': 3})

with self.assertRaises(Exception) as e:
l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}}))

self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits')

def test_many_records_in_one_seconds_for_user_with_2_sec_window(self):
"""
To verify that if user give 2 seconds window then also we don't get infinite loop behavior
"""
user_obj = Users(client=MockSearch(), config={'search_window_size': 2})

with self.assertRaises(Exception) as e:
l = list(user_obj.sync({'bookmarks': {'users': {'updated_at': '2022-03-30T08:45:21.000000Z'}}}))

self.assertEqual(str(e.exception), 'users - Unable to get all users within minimum window of a single second (2022-03-30T08:45:21Z), found 1001 users within this timestamp. Zendesk can only provide a maximum of 1000 users per request. See: https://develop.zendesk.com/hc/en-us/articles/360022563994--BREAKING-New-Search-API-Result-Limits')

def test_search_window_size_equals_1_sec(self):
"""
To verify that search window size 1 second cannot be configured
"""

with self.assertRaises(ValueError) as e:
Users(client=MockSearch(), config={'search_window_size': 1})

self.assertEqual(str(e.exception), 'Search window size cannot be less than 2')

0 comments on commit d9473d9

Please sign in to comment.