singer-io · KrisPersonal · Dec 7, 2021 · Oct 19, 2021 · Oct 20, 2021 · Oct 21, 2021
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -14,12 +14,19 @@ jobs:
             virtualenv -p python3 /usr/local/share/virtualenvs/tap-zendesk
             source /usr/local/share/virtualenvs/tap-zendesk/bin/activate
             pip install .[test]
+            pip install coverage
       - run:
           name: 'pylint'
           command: |
             source /usr/local/share/virtualenvs/tap-zendesk/bin/activate
-            make test
+            pylint tap_zendesk -d missing-docstring,invalid-name,line-too-long,too-many-locals,too-few-public-methods,fixme,stop-iteration-return,too-many-branches,useless-import-alias,no-else-return,logging-not-lazy
+            nosetests --with-coverage --cover-erase --cover-package=tap_zendesk --cover-html-dir=htmlcov test/unittests
+            coverage html
       - add_ssh_keys
+      - store_test_results:
+          path: test_output/report.xml
+      - store_artifacts:
+          path: htmlcov
       - run:
           name: 'Integration Tests'
           command: |

diff --git a/README.md b/README.md
@@ -19,7 +19,8 @@ OAuth is the default authentication method for `tap-zendesk`. To use OAuth, you
 {
   "access_token": "AVERYLONGOAUTHTOKEN",
   "subdomain": "acme",
-  "start_date": "2000-01-01T00:00:00Z"
+  "start_date": "2000-01-01T00:00:00Z",
+  "request_timeout": 300
 }
 ```
 
@@ -33,7 +34,8 @@ For a simplified, but less granular setup, you can use the API Token authenticat
   "email": "user@domain.com",
   "api_token": "THISISAVERYLONGTOKEN",
   "subdomain": "acme",
-  "start_date": "2000-01-01T00:00:00Z"
+  "start_date": "2000-01-01T00:00:00Z",
+  "request_timeout": 300
 }
 ```
 

diff --git a/tap_zendesk/__init__.py b/tap_zendesk/__init__.py
@@ -15,6 +15,7 @@
 
 LOGGER = singer.get_logger()
 
+REQUEST_TIMEOUT = 300
 REQUIRED_CONFIG_KEYS = [
     "start_date",
     "subdomain",
@@ -190,10 +191,14 @@ def get_session(config):
 def main():
     parsed_args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
 
+    # Set request timeout to config param `request_timeout` value.
+    # If value is 0,"0","" or not passed then it set default to 300 seconds.
+    config_request_timeout = parsed_args.config.get('request_timeout')
+    request_timeout = config_request_timeout and float(config_request_timeout) or REQUEST_TIMEOUT # pylint: disable=consider-using-ternary
     # OAuth has precedence
     creds = oauth_auth(parsed_args) or api_token_auth(parsed_args)
     session = get_session(parsed_args.config)
-    client = Zenpy(session=session, **creds)
+    client = Zenpy(session=session, timeout=request_timeout, **creds) # Pass request timeout
 
     if not client:
         LOGGER.error("""No suitable authentication keys provided.""")

diff --git a/tap_zendesk/http.py b/tap_zendesk/http.py
@@ -2,7 +2,7 @@
 import backoff
 import requests
 import singer
-
+from requests.exceptions import Timeout
 
 LOGGER = singer.get_logger()
 
@@ -22,12 +22,14 @@ def is_fatal(exception):
                       requests.exceptions.HTTPError,
                       max_tries=10,
                       giveup=is_fatal)
-def call_api(url, params, headers):
-    response = requests.get(url, params=params, headers=headers)
+@backoff.on_exception(backoff.expo,Timeout, #As timeout error does not have attribute status_code, hence giveup does not work in this case.
+                      max_tries=10) # So, here we added another backoff expression.
+def call_api(url, request_timeout, params, headers):
+    response = requests.get(url, params=params, headers=headers, timeout=request_timeout) # Pass request timeout
     response.raise_for_status()
     return response
 
-def get_cursor_based(url, access_token, cursor=None, **kwargs):
+def get_cursor_based(url, access_token, request_timeout, cursor=None, **kwargs):
     headers = {
         'Content-Type': 'application/json',
         'Accept': 'application/json',
@@ -43,7 +45,7 @@ def get_cursor_based(url, access_token, cursor=None, **kwargs):
     if cursor:
         params['page[after]'] = cursor
 
-    response = call_api(url, params=params, headers=headers)
+    response = call_api(url, request_timeout, params=params, headers=headers)
     response_json = response.json()
 
     yield response_json
@@ -54,13 +56,13 @@ def get_cursor_based(url, access_token, cursor=None, **kwargs):
         cursor = response_json['meta']['after_cursor']
         params['page[after]'] = cursor
 
-        response = call_api(url, params=params, headers=headers)
+        response = call_api(url, request_timeout, params=params, headers=headers)
         response_json = response.json()
 
         yield response_json
         has_more = response_json['meta']['has_more']
 
-def get_offset_based(url, access_token, **kwargs):
+def get_offset_based(url, access_token, request_timeout, **kwargs):
     headers = {
         'Content-Type': 'application/json',
         'Accept': 'application/json',
@@ -73,21 +75,21 @@ def get_offset_based(url, access_token, **kwargs):
         **kwargs.get('params', {})
     }
 
-    response = call_api(url, params=params, headers=headers)
+    response = call_api(url, request_timeout, params=params, headers=headers)
     response_json = response.json()
 
     yield response_json
 
     next_url = response_json.get('next_page')
 
     while next_url:
-        response = call_api(next_url, params=None, headers=headers)
+        response = call_api(next_url, request_timeout, params=None, headers=headers)
         response_json = response.json()
 
         yield response_json
         next_url = response_json.get('next_page')
 
-def get_incremental_export(url, access_token, start_time):
+def get_incremental_export(url, access_token, request_timeout, start_time):
     headers = {
         'Content-Type': 'application/json',
         'Accept': 'application/json',
@@ -96,7 +98,7 @@ def get_incremental_export(url, access_token, start_time):
 
     params = {'start_time': start_time.timestamp()}
 
-    response = call_api(url, params=params, headers=headers)
+    response = call_api(url, request_timeout, params=params, headers=headers)
     response_json = response.json()
 
     yield response_json

diff --git a/tap_zendesk/streams.py b/tap_zendesk/streams.py
@@ -15,6 +15,7 @@
 
 LOGGER = singer.get_logger()
 KEY_PROPERTIES = ['id']
+REQUEST_TIMEOUT = 300
 
 CUSTOM_TYPES = {
     'text': 'string',
@@ -113,7 +114,12 @@ def get_objects(self, **kwargs):
         '''
         url = self.endpoint.format(self.config['subdomain'])
 
-        for page in http.get_cursor_based(url, self.config['access_token'], **kwargs):
+        # Set and pass request timeout to config param `request_timeout` value.
+        # If value is 0,"0","" or not passed then it set default to 300 seconds.
+        config_request_timeout = self.config.get('request_timeout')
+        request_timeout = config_request_timeout and float(config_request_timeout) or REQUEST_TIMEOUT # pylint: disable=consider-using-ternary
+
+        for page in http.get_cursor_based(url, self.config['access_token'], request_timeout, **kwargs):
             yield from page[self.item_key]
 
 class CursorBasedExportStream(Stream):
@@ -126,7 +132,11 @@ def get_objects(self, start_time):
         '''
         url = self.endpoint.format(self.config['subdomain'])
 
-        for page in http.get_incremental_export(url, self.config['access_token'], start_time):
+        # Set and pass request timeout to config param `request_timeout` value.
+        # If value is 0,"0","" or not passed then it set default to 300 seconds.
+        config_request_timeout = self.config.get('request_timeout')
+        request_timeout = config_request_timeout and float(config_request_timeout) or REQUEST_TIMEOUT # pylint: disable=consider-using-ternary
+        for page in http.get_incremental_export(url, self.config['access_token'], request_timeout, start_time):
             yield from page[self.item_key]
 
 
@@ -366,7 +376,11 @@ class TicketAudits(Stream):
 
     def get_objects(self, ticket_id):
         url = self.endpoint.format(self.config['subdomain'], ticket_id)
-        pages = http.get_offset_based(url, self.config['access_token'])
+        # Set and pass request timeout to config param `request_timeout` value.
+        # If value is 0,"0","" or not passed then it set default to 300 seconds.
+        config_request_timeout = self.config.get('request_timeout')
+        request_timeout = config_request_timeout and float(config_request_timeout) or REQUEST_TIMEOUT # pylint: disable=consider-using-ternary
+        pages = http.get_offset_based(url, self.config['access_token'], request_timeout)
         for page in pages:
             yield from page[self.item_key]
 
@@ -387,7 +401,11 @@ class TicketMetrics(CursorBasedStream):
     def sync(self, ticket_id):
         # Only 1 ticket metric per ticket
         url = self.endpoint.format(self.config['subdomain'], ticket_id)
-        pages = http.get_offset_based(url, self.config['access_token'])
+        # Set and pass request timeout to config param `request_timeout` value.
+        # If value is 0,"0","" or not passed then it set default to 300 seconds.
+        config_request_timeout = self.config.get('request_timeout')
+        request_timeout = config_request_timeout and float(config_request_timeout) or REQUEST_TIMEOUT # pylint: disable=consider-using-ternary
+        pages = http.get_offset_based(url, self.config['access_token'], request_timeout)
         for page in pages:
             zendesk_metrics.capture('ticket_metric')
             self.count += 1
@@ -402,7 +420,11 @@ class TicketComments(Stream):
 
     def get_objects(self, ticket_id):
         url = self.endpoint.format(self.config['subdomain'], ticket_id)
-        pages = http.get_offset_based(url, self.config['access_token'])
+        # Set and pass request timeout to config param `request_timeout` value.
+        # If value is 0,"0","" or not passed then it set default to 300 seconds.
+        config_request_timeout = self.config.get('request_timeout')
+        request_timeout = config_request_timeout and float(config_request_timeout) or REQUEST_TIMEOUT # pylint: disable=consider-using-ternary
+        pages = http.get_offset_based(url, self.config['access_token'], request_timeout)
 
         for page in pages:
             yield from page[self.item_key]

diff --git a/test/unittests/test_http.py b/test/unittests/test_http.py
@@ -13,6 +13,7 @@
              'after_cursor': 'some_cursor'}
 }
 
+REQUEST_TIMEOUT = 300
 def mocked_get(*args, **kwargs):
     fake_response = requests.models.Response()
     fake_response.headers.update(kwargs.get('headers', {}))
@@ -36,7 +37,7 @@ class TestBackoff(unittest.TestCase):
            side_effect=[mocked_get(status_code=200, json=SINGLE_RESPONSE)])
     def test_get_cursor_based_gets_one_page(self, mock_get):
         responses = [response for response in http.get_cursor_based(url='some_url',
-                                                                    access_token='some_token')]
+                                                                    access_token='some_token', request_timeout=REQUEST_TIMEOUT)]
         actual_response = responses[0]
         self.assertDictEqual(SINGLE_RESPONSE,
                              actual_response)
@@ -53,7 +54,7 @@ def test_get_cursor_based_gets_one_page(self, mock_get):
     def test_get_cursor_based_can_paginate(self, mock_get):
         responses = [response
                      for response in http.get_cursor_based(url='some_url',
-                                                           access_token='some_token')]
+                                                           access_token='some_token', request_timeout=REQUEST_TIMEOUT)]
 
         self.assertDictEqual({"key1": "val1", **PAGINATE_RESPONSE},
                               responses[0])
@@ -78,7 +79,7 @@ def test_get_cursor_based_handles_429(self, mock_get):
         - can handle either a string or an integer for the retry header
         """
         responses = [response for response in http.get_cursor_based(url='some_url',
-                                                                    access_token='some_token')]
+                                                                    access_token='some_token', request_timeout=REQUEST_TIMEOUT)]
         actual_response = responses[0]
         self.assertDictEqual({"key1": "val1", **SINGLE_RESPONSE},
                              actual_response)