From 7f239298022e6d67634127d2771edc9444194b8d Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Mon, 22 Aug 2022 06:50:26 +0000 Subject: [PATCH 01/32] TDL-20356 implement function based to class based --- setup.py | 4 +- tap_freshdesk/__init__.py | 270 +++----------------------------------- tap_freshdesk/client.py | 56 ++++++++ tap_freshdesk/discover.py | 33 +++++ tap_freshdesk/schema.py | 49 +++++++ tap_freshdesk/streams.py | 123 +++++++++++++++++ tap_freshdesk/sync.py | 69 ++++++++++ 7 files changed, 351 insertions(+), 253 deletions(-) create mode 100644 tap_freshdesk/client.py create mode 100644 tap_freshdesk/discover.py create mode 100644 tap_freshdesk/schema.py create mode 100644 tap_freshdesk/streams.py create mode 100644 tap_freshdesk/sync.py diff --git a/setup.py b/setup.py index b4f3170..67d9ead 100644 --- a/setup.py +++ b/setup.py @@ -10,9 +10,9 @@ classifiers=['Programming Language :: Python :: 3 :: Only'], py_modules=['tap_freshdesk'], install_requires=[ - 'singer-python==5.2.3', + 'singer-python==5.12.2', 'requests==2.20.0', - 'backoff==1.3.2' + 'backoff==1.8.0' ], entry_points=''' [console_scripts] diff --git a/tap_freshdesk/__init__.py b/tap_freshdesk/__init__.py index a79b60c..e1956c3 100644 --- a/tap_freshdesk/__init__.py +++ b/tap_freshdesk/__init__.py @@ -1,259 +1,27 @@ #!/usr/bin/env python3 - -import sys -import time - -import backoff -import requests -from requests.exceptions import HTTPError import singer +from singer import utils +from tap_freshdesk.discover import discover as _discover +from tap_freshdesk.sync import sync as _sync +from tap_freshdesk.client import FreshdeskClient -from tap_freshdesk import utils - - -REQUIRED_CONFIG_KEYS = ['api_key', 'domain', 'start_date'] -PER_PAGE = 100 -BASE_URL = "https://{}.freshdesk.com" -CONFIG = {} -STATE = {} - -endpoints = { - "tickets": "/api/v2/tickets", - "sub_ticket": "/api/v2/tickets/{id}/{entity}", - "agents": "/api/v2/agents", - "roles": "/api/v2/roles", - "groups": "/api/v2/groups", - "companies": "/api/v2/companies", - "contacts": "/api/v2/contacts", -} - -logger = singer.get_logger() -session = requests.Session() - - -def get_url(endpoint, **kwargs): - return BASE_URL.format(CONFIG['domain']) + endpoints[endpoint].format(**kwargs) - - -@backoff.on_exception(backoff.expo, - (requests.exceptions.RequestException), - max_tries=5, - giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, - factor=2) -@utils.ratelimit(1, 2) -def request(url, params=None): - params = params or {} - headers = {} - if 'user_agent' in CONFIG: - headers['User-Agent'] = CONFIG['user_agent'] - - req = requests.Request('GET', url, params=params, auth=(CONFIG['api_key'], ""), headers=headers).prepare() - logger.info("GET {}".format(req.url)) - resp = session.send(req) - - if 'Retry-After' in resp.headers: - retry_after = int(resp.headers['Retry-After']) - logger.info("Rate limit reached. Sleeping for {} seconds".format(retry_after)) - time.sleep(retry_after) - return request(url, params) - - resp.raise_for_status() - - return resp - - -def get_start(entity): - if entity not in STATE: - STATE[entity] = CONFIG['start_date'] - - return STATE[entity] - - -def gen_request(url, params=None): - params = params or {} - params["per_page"] = PER_PAGE - page = 1 - while True: - params['page'] = page - data = request(url, params).json() - for row in data: - yield row - - if len(data) == PER_PAGE: - page += 1 - else: - break - - -def transform_dict(d, key_key="name", value_key="value", force_str=False): - # Custom fields are expected to be strings, but sometimes the API sends - # booleans. We cast those to strings to match the schema. - rtn = [] - for k, v in d.items(): - if force_str: - v = str(v).lower() - rtn.append({key_key: k, value_key: v}) - return rtn - - -def sync_tickets(): - bookmark_property = 'updated_at' - - singer.write_schema("tickets", - utils.load_schema("tickets"), - ["id"], - bookmark_properties=[bookmark_property]) - - singer.write_schema("conversations", - utils.load_schema("conversations"), - ["id"], - bookmark_properties=[bookmark_property]) - - singer.write_schema("satisfaction_ratings", - utils.load_schema("satisfaction_ratings"), - ["id"], - bookmark_properties=[bookmark_property]) - - singer.write_schema("time_entries", - utils.load_schema("time_entries"), - ["id"], - bookmark_properties=[bookmark_property]) +REQUIRED_CONFIG_KEYS = ["start_date", "domain", "api_key"] - sync_tickets_by_filter(bookmark_property) - sync_tickets_by_filter(bookmark_property, "deleted") - sync_tickets_by_filter(bookmark_property, "spam") - - -def sync_tickets_by_filter(bookmark_property, predefined_filter=None): - endpoint = "tickets" - - state_entity = endpoint - if predefined_filter: - state_entity = state_entity + "_" + predefined_filter - - start = get_start(state_entity) - - params = { - 'updated_since': start, - 'order_by': bookmark_property, - 'order_type': "asc", - 'include': "requester,company,stats" - } - - if predefined_filter: - logger.info("Syncing tickets with filter {}".format(predefined_filter)) - - if predefined_filter: - params['filter'] = predefined_filter - - for i, row in enumerate(gen_request(get_url(endpoint), params)): - logger.info("Ticket {}: Syncing".format(row['id'])) - row.pop('attachments', None) - row['custom_fields'] = transform_dict(row['custom_fields'], force_str=True) - - # get all sub-entities and save them - logger.info("Ticket {}: Syncing conversations".format(row['id'])) - - try: - for subrow in gen_request(get_url("sub_ticket", id=row['id'], entity="conversations")): - subrow.pop("attachments", None) - subrow.pop("body", None) - if subrow[bookmark_property] >= start: - singer.write_record("conversations", subrow, time_extracted=singer.utils.now()) - except HTTPError as e: - if e.response.status_code == 403: - logger.info('Invalid ticket ID requested from Freshdesk {0}'.format(row['id'])) - else: - raise - - try: - logger.info("Ticket {}: Syncing satisfaction ratings".format(row['id'])) - for subrow in gen_request(get_url("sub_ticket", id=row['id'], entity="satisfaction_ratings")): - subrow['ratings'] = transform_dict(subrow['ratings'], key_key="question") - if subrow[bookmark_property] >= start: - singer.write_record("satisfaction_ratings", subrow, time_extracted=singer.utils.now()) - except HTTPError as e: - if e.response.status_code == 403: - logger.info("The Surveys feature is unavailable. Skipping the satisfaction_ratings stream.") - else: - raise - - try: - logger.info("Ticket {}: Syncing time entries".format(row['id'])) - for subrow in gen_request(get_url("sub_ticket", id=row['id'], entity="time_entries")): - if subrow[bookmark_property] >= start: - singer.write_record("time_entries", subrow, time_extracted=singer.utils.now()) - - except HTTPError as e: - if e.response.status_code == 403: - logger.info("The Timesheets feature is unavailable. Skipping the time_entries stream.") - elif e.response.status_code == 404: - # 404 is being returned for deleted tickets and spam - logger.info("Could not retrieve time entries for ticket id {}. This may be caused by tickets " - "marked as spam or deleted.".format(row['id'])) - else: - raise - - utils.update_state(STATE, state_entity, row[bookmark_property]) - singer.write_record(endpoint, row, time_extracted=singer.utils.now()) - singer.write_state(STATE) - - -def sync_time_filtered(entity): - bookmark_property = 'updated_at' - - singer.write_schema(entity, - utils.load_schema(entity), - ["id"], - bookmark_properties=[bookmark_property]) - start = get_start(entity) - - logger.info("Syncing {} from {}".format(entity, start)) - for row in gen_request(get_url(entity)): - if row[bookmark_property] >= start: - if 'custom_fields' in row: - row['custom_fields'] = transform_dict(row['custom_fields'], force_str=True) - - utils.update_state(STATE, entity, row[bookmark_property]) - singer.write_record(entity, row, time_extracted=singer.utils.now()) - - singer.write_state(STATE) - - -def do_sync(): - logger.info("Starting FreshDesk sync") - - try: - sync_tickets() - sync_time_filtered("agents") - sync_time_filtered("roles") - sync_time_filtered("groups") - # commenting out this high-volume endpoint for now - #sync_time_filtered("contacts") - sync_time_filtered("companies") - except HTTPError as e: - logger.critical( - "Error making request to Freshdesk API: GET %s: [%s - %s]", - e.request.url, e.response.status_code, e.response.content) - sys.exit(1) - - logger.info("Completed sync") - - -def main_impl(): - config, state = utils.parse_args(REQUIRED_CONFIG_KEYS) - CONFIG.update(config) - STATE.update(state) - do_sync() +LOGGER = singer.get_logger() +@utils.handle_top_exception(LOGGER) def main(): - try: - main_impl() - except Exception as exc: - logger.critical(exc) - raise exc - - -if __name__ == '__main__': + args = utils.parse_args(REQUIRED_CONFIG_KEYS) + config = args.config + client = FreshdeskClient(config) + if args.discover: + catalog = _discover() + catalog.dump() + else: + catalog = args.catalog \ + if args.catalog else _discover() + _sync(client, config, args.state, catalog.to_dict()) + +if __name__ == "__main__": main() diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py new file mode 100644 index 0000000..dec7b5b --- /dev/null +++ b/tap_freshdesk/client.py @@ -0,0 +1,56 @@ +import time + +import backoff +import requests +import singer +from tap_freshdesk import utils + +LOGGER = singer.get_logger() +DEFAULT_TIMEOUT = 300 +BASE_URL = "https://{}.freshdesk.com" + + +class FreshdeskClient: + + def __init__(self, config): + self.config = config + self.session = requests.Session() + self.base_url = BASE_URL.format(config.get("domain")) + + def __enter__(self): + self.check_access_token() + return self + + def __exit__(self, exception_type, exception_value, traceback): + # Kill the session instance. + self.session.close() + + def check_access_token(self): + self.request(self.base_url+"/api/v2/roles", {"per_page": 1, "page": 1}) + + @backoff.on_exception(backoff.expo, + (requests.exceptions.RequestException), + max_tries=5, + giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, + factor=2) + @utils.ratelimit(1, 2) + def request(self, url, params={}): + headers = {} + if 'user_agent' in self.config: + headers['User-Agent'] = self.config['user_agent'] + + req = requests.Request('GET', url, params=params, auth=(self.config['api_key'], ""), headers=headers).prepare() + LOGGER.info("GET {}".format(req.url)) + response = self.session.send(req) + + if 'Retry-After' in response.headers: + retry_after = int(response.headers['Retry-After']) + LOGGER.info("Rate limit reached. Sleeping for {} seconds".format(retry_after)) + time.sleep(retry_after) + return self.request(url, params) + + response.raise_for_status() + + return response.json() + + diff --git a/tap_freshdesk/discover.py b/tap_freshdesk/discover.py new file mode 100644 index 0000000..caebacb --- /dev/null +++ b/tap_freshdesk/discover.py @@ -0,0 +1,33 @@ +import singer +from singer.catalog import Catalog, CatalogEntry, Schema +from tap_freshdesk.schema import get_schemas + +LOGGER = singer.get_logger() + +def discover(): + """ + Run the discovery mode, prepare the catalog file and return the catalog. + """ + schemas, field_metadata = get_schemas() + catalog = Catalog([]) + + for stream_name, schema_dict in schemas.items(): + try: + schema = Schema.from_dict(schema_dict) + mdata = field_metadata[stream_name] + except Exception as err: + LOGGER.error(err) + LOGGER.error('stream_name: %s', stream_name) + LOGGER.error('type schema_dict: %s', type(schema_dict)) + raise err + + key_properties = mdata[0]['metadata'].get('table-key-properties') + catalog.streams.append(CatalogEntry( + stream=stream_name, + tap_stream_id=stream_name, + key_properties= key_properties, + schema=schema, + metadata=mdata + )) + + return catalog diff --git a/tap_freshdesk/schema.py b/tap_freshdesk/schema.py new file mode 100644 index 0000000..257150b --- /dev/null +++ b/tap_freshdesk/schema.py @@ -0,0 +1,49 @@ +import os +import json +from singer import metadata +import singer +from tap_freshdesk.streams import STREAMS + +def get_abs_path(path): + """ + Get the absolute path for the schema files. + """ + return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) + +def get_schemas(): + """ + Load the schema references, prepare metadata for each stream and return schema and metadata for the catalog. + """ + schemas = {} + field_metadata = {} + + refs = {} + for stream_name, stream_metadata in STREAMS.items(): + schema_path = get_abs_path('schemas/{}.json'.format(stream_name)) + + with open(schema_path) as file: + schema = json.load(file) + + schemas[stream_name] = schema + schema = singer.resolve_schema_references(schema, refs) + + mdata = metadata.new() + mdata = metadata.get_standard_metadata( + schema=schema, + key_properties = (hasattr(stream_metadata, 'key_properties') or None) and stream_metadata.key_properties, + valid_replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys, + replication_method = (hasattr(stream_metadata, 'replication_method') or None) and stream_metadata.replication_method + ) + mdata = metadata.to_map(mdata) + + # Loop through all keys and make replication keys of automatic inclusion + for field_name in schema['properties'].keys(): + + replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys + if replication_keys and field_name in replication_keys: + mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') + + mdata = metadata.to_list(mdata) + field_metadata[stream_name] = mdata + + return schemas, field_metadata diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py new file mode 100644 index 0000000..a7b69cc --- /dev/null +++ b/tap_freshdesk/streams.py @@ -0,0 +1,123 @@ +from datetime import datetime +import singer +from singer import bookmarks + + +LOGGER = singer.get_logger() + + +def get_bookmark(state, stream_name, form_id, bookmark_key, start_date): + """ + Return bookmark value if available in the state otherwise return start date + """ + if form_id: + return bookmarks.get_bookmark(state, stream_name, form_id, {}).get(bookmark_key, start_date) + return bookmarks.get_bookmark(state, stream_name, bookmark_key, start_date) + +def get_min_bookmark(stream, selected_streams, bookmark, start_date, state, form_id, bookmark_key): + """ + Get the minimum bookmark from the parent and its corresponding child bookmarks. + """ + + stream_obj = STREAMS[stream]() + min_bookmark = bookmark + if stream in selected_streams: + min_bookmark = min(min_bookmark, get_bookmark(state, stream, form_id, bookmark_key, start_date)) + + for child in filter(lambda x: x in selected_streams, stream_obj.children): + min_bookmark = min(min_bookmark, get_min_bookmark(child, selected_streams, bookmark, start_date, state, form_id, bookmark_key)) + + return min_bookmark + +def get_schema(catalog, stream_id): + """ + Return catalog of the specified stream. + """ + stream_catalog = [cat for cat in catalog if cat['tap_stream_id'] == stream_id ][0] + return stream_catalog + +def write_bookmarks(stream, selected_streams, bookmark_value, state): + stream_obj = STREAMS[stream]() + # If the stream is selected, write the bookmark. + if stream in selected_streams: + singer.write_bookmark(state, stream_obj.tap_stream_id, stream_obj.replication_keys[0], bookmark_value) + +class Stream: + """ + Base class representing tap-freshdesk streams. + """ + tap_stream_id = None + replication_method = None + replication_keys = None + key_properties = [] + endpoint = None + filter_param = False + children = [] + headers = {} + params = {} + parent = None + data_key = None + child_data_key = None + records_count = {} + + def add_fields_at_1st_level(self, record, additional_data={}): + pass + +class Agents(Stream): + tap_stream_id = 'agents' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +class Companies(Stream): + tap_stream_id = 'companies' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +class Conversations(Stream): + tap_stream_id = 'conversations' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +class Groups(Stream): + tap_stream_id = 'groups' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +class Roles(Stream): + tap_stream_id = 'roles' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +class SatisfactionRatings(Stream): + tap_stream_id = 'satisfaction_ratings' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +class Tickets(Stream): + tap_stream_id = 'tickets' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +class TimeEntries(Stream): + tap_stream_id = 'time_entries' + key_properties = 'id' + replication_keys = 'updated_at' + replication_method = 'INCREMENTAL' + +STREAMS = { + "agents": Agents, + "companies": Companies, + "conversations": Conversations, + "groups": Groups, + "roles": Roles, + "satisfaction_ratings": SatisfactionRatings, + "tickets": Tickets, + "time_entries": TimeEntries +} diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py new file mode 100644 index 0000000..b5d1e16 --- /dev/null +++ b/tap_freshdesk/sync.py @@ -0,0 +1,69 @@ +import singer +from tap_freshdesk.streams import STREAMS + +LOGGER = singer.get_logger() + +def write_schemas(stream_id, catalog, selected_streams): + """ + Write the schemas for each stream. + """ + stream_obj = STREAMS[stream_id]() + + if stream_id in selected_streams: + # Get catalog object for a particular stream. + stream = [cat for cat in catalog['streams'] if cat['tap_stream_id'] == stream_id ][0] + singer.write_schema(stream_id, stream['schema'], stream['key_properties']) + + for child in stream_obj.children: + write_schemas(child, catalog, selected_streams) + +def get_selected_streams(catalog): + ''' + Gets selected streams. Checks schema's 'selected' + first -- and then checks metadata, looking for an empty + breadcrumb and mdata with a 'selected' entry + ''' + selected_streams = [] + for stream in catalog['streams']: + stream_metadata = stream['metadata'] + for entry in stream_metadata: + # Stream metadata will have an empty breadcrumb + if not entry['breadcrumb'] and entry['metadata'].get('selected',None): + selected_streams.append(stream['tap_stream_id']) + return selected_streams + +def get_stream_to_sync(selected_streams): + """ + Get the streams for which the sync function should be called(the parent in case of selected child streams). + """ + streams_to_sync = [] + for stream_name, stream_obj in STREAMS.items(): + if (stream_name in selected_streams) or any(child in selected_streams for child in stream_obj.children): + streams_to_sync.append(stream_name) + return streams_to_sync + +def sync(client, config, state, catalog): + """ + Sync selected streams. + """ + + # Get selected streams, make sure stream dependencies are met + selected_streams = get_selected_streams(catalog) + streams_to_sync = get_stream_to_sync(selected_streams) + LOGGER.info("Selected Streams: %s", selected_streams) + LOGGER.info("Syncing Streams: %s", streams_to_sync) + + # Initializing a dictionary to keep track of record count by streams + records_count = {stream:0 for stream in STREAMS.keys()} + + singer.write_state(state) + for stream in streams_to_sync: + stream_obj = STREAMS[stream]() + + write_schemas(stream, catalog, selected_streams) + + stream_obj.sync_obj(client, state, catalog['streams'], config["start_date"], + selected_streams, records_count) + + for stream_name, stream_count in records_count.items(): + LOGGER.info('%s: %d', stream_name, stream_count) From 72d89a6827da35d9ae49103882747900ae803eed Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Mon, 22 Aug 2022 08:44:40 +0000 Subject: [PATCH 02/32] added comments to the client --- tap_freshdesk/client.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index dec7b5b..03e7208 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -6,11 +6,13 @@ from tap_freshdesk import utils LOGGER = singer.get_logger() -DEFAULT_TIMEOUT = 300 BASE_URL = "https://{}.freshdesk.com" class FreshdeskClient: + """ + The client class is used for making REST calls to the Freshdesk API. + """ def __init__(self, config): self.config = config @@ -26,6 +28,9 @@ def __exit__(self, exception_type, exception_value, traceback): self.session.close() def check_access_token(self): + """ + Check if the access token is valid. + """ self.request(self.base_url+"/api/v2/roles", {"per_page": 1, "page": 1}) @backoff.on_exception(backoff.expo, @@ -35,6 +40,9 @@ def check_access_token(self): factor=2) @utils.ratelimit(1, 2) def request(self, url, params={}): + """ + Call rest API and return the response in case of status code 200. + """ headers = {} if 'user_agent' in self.config: headers['User-Agent'] = self.config['user_agent'] @@ -43,6 +51,7 @@ def request(self, url, params={}): LOGGER.info("GET {}".format(req.url)) response = self.session.send(req) + # Call the function again if the rate limit is exceeded if 'Retry-After' in response.headers: retry_after = int(response.headers['Retry-After']) LOGGER.info("Rate limit reached. Sleeping for {} seconds".format(retry_after)) @@ -52,5 +61,3 @@ def request(self, url, params={}): response.raise_for_status() return response.json() - - From 42305fdc17b8df50c6947411b8210e10e67a1ebb Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Mon, 22 Aug 2022 09:15:36 +0000 Subject: [PATCH 03/32] updated config.yml --- .circleci/config.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index fd0ac65..24b4069 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -20,6 +20,23 @@ jobs: command: | source /usr/local/share/virtualenvs/tap-tester/bin/activate stitch-validate-json tap_freshdesk/schemas/*.json + - run: + name: 'pylint' + command: | + source /usr/local/share/virtualenvs/tap-freshdesk/bin/activate + pylint tap_freshdesk --disable 'missing-module-docstring,missing-function-docstring,missing-class-docstring,line-too-long,invalid-name,too-many-lines,consider-using-f-string,too-many-arguments,too-many-locals' + - run: + name: 'Unit Tests' + command: | + source /usr/local/share/virtualenvs/tap-freshdesk/bin/activate + pip install nose coverage parameterized + nosetests --with-coverage --cover-erase --cover-package=tap_freshdesk --cover-html-dir=htmlcov tests/unittests + coverage html + when: always + - store_test_results: + path: test_output/report.xml + - store_artifacts: + path: htmlcov - run: name: 'Integration Tests' command: | From d1963de1ab4e07e5622308d133560fe2e6f55762 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Tue, 23 Aug 2022 06:06:58 +0000 Subject: [PATCH 04/32] added sync for normal streams --- tap_freshdesk/streams.py | 73 ++++++++++++++++++++++++++++++++-------- tap_freshdesk/sync.py | 2 +- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index a7b69cc..9c3ef0f 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -6,15 +6,13 @@ LOGGER = singer.get_logger() -def get_bookmark(state, stream_name, form_id, bookmark_key, start_date): +def get_bookmark(state, stream_name, bookmark_key, start_date): """ Return bookmark value if available in the state otherwise return start date """ - if form_id: - return bookmarks.get_bookmark(state, stream_name, form_id, {}).get(bookmark_key, start_date) return bookmarks.get_bookmark(state, stream_name, bookmark_key, start_date) -def get_min_bookmark(stream, selected_streams, bookmark, start_date, state, form_id, bookmark_key): +def get_min_bookmark(stream, selected_streams, bookmark, start_date, state, bookmark_key): """ Get the minimum bookmark from the parent and its corresponding child bookmarks. """ @@ -22,10 +20,10 @@ def get_min_bookmark(stream, selected_streams, bookmark, start_date, state, form stream_obj = STREAMS[stream]() min_bookmark = bookmark if stream in selected_streams: - min_bookmark = min(min_bookmark, get_bookmark(state, stream, form_id, bookmark_key, start_date)) + min_bookmark = min(min_bookmark, get_bookmark(state, stream, bookmark_key, start_date)) for child in filter(lambda x: x in selected_streams, stream_obj.children): - min_bookmark = min(min_bookmark, get_min_bookmark(child, selected_streams, bookmark, start_date, state, form_id, bookmark_key)) + min_bookmark = min(min_bookmark, get_min_bookmark(child, selected_streams, bookmark, start_date, state, bookmark_key)) return min_bookmark @@ -59,57 +57,104 @@ class Stream: data_key = None child_data_key = None records_count = {} + force_str = False def add_fields_at_1st_level(self, record, additional_data={}): pass + def transform_dict(self, d, key_key="name", value_key="value", force_str=False): + # Custom fields are expected to be strings, but sometimes the API sends + # booleans. We cast those to strings to match the schema. + rtn = [] + for k, v in d.items(): + if force_str: + v = str(v).lower() + rtn.append({key_key: k, value_key: v}) + return rtn + + def build_url(self, base_url, *args): + return base_url + '/api/v2/'+ self.path.format(*args) + + def sync_obj(self, state, start_date, client, catalog, selected_streams, records_count): + stream_catalog = get_schema(catalog, self.tap_stream_id) + bookmark = get_bookmark(state, self.tap_stream_id, self.replication_keys[0], start_date) + max_bookmark = bookmark + full_url = self.build_url(client.base_url) + + LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, bookmark)) + with singer.metrics.record_counter(self.tap_stream_id) as counter: + with singer.Transformer() as transformer: + extraction_time = singer.utils.now() + stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) + for row in client.request(full_url): + if row[self.replication_keys[0]] >= bookmark: + if 'custom_fields' in row: + row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) + + rec = transformer.transform(row, stream_catalog['schema'], stream_metadata) + singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) + max_bookmark = max(max_bookmark, rec[self.replication_keys[0]]) + counter.increment(1) + singer.write_bookmark(state, self.tap_stream_id, self.replication_keys[0], max_bookmark) + + singer.write_state(state) + + class Agents(Stream): tap_stream_id = 'agents' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'agents' class Companies(Stream): tap_stream_id = 'companies' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'companies' class Conversations(Stream): tap_stream_id = 'conversations' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'tickets/{}/conversations' class Groups(Stream): tap_stream_id = 'groups' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'groups' class Roles(Stream): tap_stream_id = 'roles' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'roles' class SatisfactionRatings(Stream): tap_stream_id = 'satisfaction_ratings' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'tickets/{}/satisfaction_ratings' class Tickets(Stream): tap_stream_id = 'tickets' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'tickets' class TimeEntries(Stream): tap_stream_id = 'time_entries' key_properties = 'id' - replication_keys = 'updated_at' + replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' + path = 'tickets/{}/time_entries' STREAMS = { "agents": Agents, diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index b5d1e16..fa94558 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -62,7 +62,7 @@ def sync(client, config, state, catalog): write_schemas(stream, catalog, selected_streams) - stream_obj.sync_obj(client, state, catalog['streams'], config["start_date"], + stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], selected_streams, records_count) for stream_name, stream_count in records_count.items(): From a254e6dddaa21cac514b8f8596a5c51f2914f804 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Tue, 23 Aug 2022 07:33:49 +0000 Subject: [PATCH 05/32] added pagination --- tap_freshdesk/streams.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 9c3ef0f..547cc05 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -4,6 +4,7 @@ LOGGER = singer.get_logger() +PAGE_SIZE = 100 def get_bookmark(state, stream_name, bookmark_key, start_date): @@ -52,7 +53,8 @@ class Stream: filter_param = False children = [] headers = {} - params = {} + params = {"per_page": PAGE_SIZE, "page": 1} + paginate = True parent = None data_key = None child_data_key = None @@ -82,20 +84,24 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, records full_url = self.build_url(client.base_url) LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, bookmark)) - with singer.metrics.record_counter(self.tap_stream_id) as counter: - with singer.Transformer() as transformer: - extraction_time = singer.utils.now() - stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) - for row in client.request(full_url): - if row[self.replication_keys[0]] >= bookmark: - if 'custom_fields' in row: - row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) - - rec = transformer.transform(row, stream_catalog['schema'], stream_metadata) - singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) - max_bookmark = max(max_bookmark, rec[self.replication_keys[0]]) - counter.increment(1) - singer.write_bookmark(state, self.tap_stream_id, self.replication_keys[0], max_bookmark) + while self.paginate: + with singer.metrics.record_counter(self.tap_stream_id) as counter: + with singer.Transformer() as transformer: + extraction_time = singer.utils.now() + stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) + data = client.request(full_url, self.params) + self.paginate = len(data) >= PAGE_SIZE + self.params['page'] += 1 + for row in data: + if row[self.replication_keys[0]] >= bookmark: + if 'custom_fields' in row: + row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) + + rec = transformer.transform(row, stream_catalog['schema'], stream_metadata) + singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) + max_bookmark = max(max_bookmark, rec[self.replication_keys[0]]) + counter.increment(1) + singer.write_bookmark(state, self.tap_stream_id, self.replication_keys[0], max_bookmark) singer.write_state(state) From 7bce692a6af15cc9f87200a3240c6f8e7171e2aa Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Tue, 23 Aug 2022 12:50:19 +0000 Subject: [PATCH 06/32] added parent child --- tap_freshdesk/streams.py | 88 +++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 547cc05..36c4722 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -60,6 +60,7 @@ class Stream: child_data_key = None records_count = {} force_str = False + date_filter = False def add_fields_at_1st_level(self, record, additional_data={}): pass @@ -77,32 +78,64 @@ def transform_dict(self, d, key_key="name", value_key="value", force_str=False): def build_url(self, base_url, *args): return base_url + '/api/v2/'+ self.path.format(*args) - def sync_obj(self, state, start_date, client, catalog, selected_streams, records_count): + def sync_child_stream(self, parent_id, catalogs, state, selected_stream_ids, start_date, max_bookmark, client): + + for child in self.children: + child_obj = STREAMS[child]() + child_bookmark = get_bookmark(state, child_obj.tap_stream_id, self.replication_keys[0], start_date) + + if child in selected_stream_ids: + child_catalog = get_schema(catalogs, child) + full_url = self.build_url(client.base_url, parent_id) + data = client.request(full_url, self.params) + max_bookmark = self.write_records(child_catalog, state, selected_stream_ids, start_date, child_obj.tap_stream_id, data) + # self.records_count[child_obj.tap_stream_id] += len(record[self.child_data_key]) + # max_bookmark = max(max_bookmark, record[child_obj.replication_keys[0]]) + return max_bookmark + + def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client): stream_catalog = get_schema(catalog, self.tap_stream_id) bookmark = get_bookmark(state, self.tap_stream_id, self.replication_keys[0], start_date) + + with singer.metrics.record_counter(self.tap_stream_id) as counter: + with singer.Transformer() as transformer: + extraction_time = singer.utils.now() + stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) + for row in data: + if row[self.replication_keys[0]] >= bookmark: + if 'custom_fields' in row: + row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) + + rec = transformer.transform(row, stream_catalog['schema'], stream_metadata) + singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) + max_bookmark = max(max_bookmark, rec[self.replication_keys[0]]) + counter.increment(1) + + # Write selected child records + if self.children and self.child_data_key in row: + max_bookmark = self.sync_child_stream(row[self.child_data_key], catalog, state, selected_streams, start_date, max_bookmark, client) + return max_bookmark + + def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): + bookmark = get_bookmark(state, self.tap_stream_id, self.replication_keys[0], start_date) max_bookmark = bookmark full_url = self.build_url(client.base_url) + if predefined_filter: + LOGGER.info("Syncing tickets with filter {}".format(predefined_filter)) + self.params['filter'] = predefined_filter + + if self.date_filter: + self.params['updated_since'] = bookmark + self.params['page'] = 1 LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, bookmark)) while self.paginate: - with singer.metrics.record_counter(self.tap_stream_id) as counter: - with singer.Transformer() as transformer: - extraction_time = singer.utils.now() - stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) - data = client.request(full_url, self.params) - self.paginate = len(data) >= PAGE_SIZE - self.params['page'] += 1 - for row in data: - if row[self.replication_keys[0]] >= bookmark: - if 'custom_fields' in row: - row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) - - rec = transformer.transform(row, stream_catalog['schema'], stream_metadata) - singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) - max_bookmark = max(max_bookmark, rec[self.replication_keys[0]]) - counter.increment(1) - singer.write_bookmark(state, self.tap_stream_id, self.replication_keys[0], max_bookmark) - + data = client.request(full_url, self.params) + self.paginate = len(data) >= PAGE_SIZE + self.params['page'] += 1 + max_bookmark = self.write_records(catalog, state, selected_streams, + start_date, data, max_bookmark, client) + write_bookmarks(self.tap_stream_id, selected_streams, max_bookmark, state) singer.write_state(state) @@ -126,6 +159,7 @@ class Conversations(Stream): replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'tickets/{}/conversations' + parent = 'tickets' class Groups(Stream): tap_stream_id = 'groups' @@ -147,6 +181,8 @@ class SatisfactionRatings(Stream): replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'tickets/{}/satisfaction_ratings' + parent = 'tickets' + date_filter = True class Tickets(Stream): tap_stream_id = 'tickets' @@ -154,6 +190,19 @@ class Tickets(Stream): replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'tickets' + children = ['conversations', 'satisfaction_ratings', 'time_entries'] + child_data_key = 'id' + params = { + "per_page": PAGE_SIZE, + 'order_by': replication_keys[0], + 'order_type': "asc", + 'include': "requester,company,stats" + } + + def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): + super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync) + super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, 'deleted') + super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, 'spam') class TimeEntries(Stream): tap_stream_id = 'time_entries' @@ -161,6 +210,7 @@ class TimeEntries(Stream): replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'tickets/{}/time_entries' + parent = 'tickets' STREAMS = { "agents": Agents, From d82d65b710425ccd437c9685801ec0865d262e59 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Tue, 23 Aug 2022 13:42:09 +0000 Subject: [PATCH 07/32] updated the parent child code --- tap_freshdesk/streams.py | 26 +++++++++----------------- tap_freshdesk/sync.py | 2 +- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 36c4722..2a3382d 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -61,9 +61,7 @@ class Stream: records_count = {} force_str = False date_filter = False - - def add_fields_at_1st_level(self, record, additional_data={}): - pass + parent_id = None def transform_dict(self, d, key_key="name", value_key="value", force_str=False): # Custom fields are expected to be strings, but sometimes the API sends @@ -78,22 +76,17 @@ def transform_dict(self, d, key_key="name", value_key="value", force_str=False): def build_url(self, base_url, *args): return base_url + '/api/v2/'+ self.path.format(*args) - def sync_child_stream(self, parent_id, catalogs, state, selected_stream_ids, start_date, max_bookmark, client): + def sync_child_stream(self, parent_id, catalog, state, selected_stream_ids, start_date, max_bookmark, client, streams_to_sync): for child in self.children: child_obj = STREAMS[child]() - child_bookmark = get_bookmark(state, child_obj.tap_stream_id, self.replication_keys[0], start_date) if child in selected_stream_ids: - child_catalog = get_schema(catalogs, child) - full_url = self.build_url(client.base_url, parent_id) - data = client.request(full_url, self.params) - max_bookmark = self.write_records(child_catalog, state, selected_stream_ids, start_date, child_obj.tap_stream_id, data) - # self.records_count[child_obj.tap_stream_id] += len(record[self.child_data_key]) - # max_bookmark = max(max_bookmark, record[child_obj.replication_keys[0]]) + child_obj.parent_id = parent_id + child_obj.sync_obj(state, start_date, client, catalog, selected_stream_ids, streams_to_sync) return max_bookmark - def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client): + def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync): stream_catalog = get_schema(catalog, self.tap_stream_id) bookmark = get_bookmark(state, self.tap_stream_id, self.replication_keys[0], start_date) @@ -102,7 +95,7 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ extraction_time = singer.utils.now() stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) for row in data: - if row[self.replication_keys[0]] >= bookmark: + if self.tap_stream_id in selected_streams and row[self.replication_keys[0]] >= bookmark: if 'custom_fields' in row: row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) @@ -113,13 +106,13 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ # Write selected child records if self.children and self.child_data_key in row: - max_bookmark = self.sync_child_stream(row[self.child_data_key], catalog, state, selected_streams, start_date, max_bookmark, client) + max_bookmark = self.sync_child_stream(row[self.child_data_key], catalog, state, selected_streams, start_date, max_bookmark, client, streams_to_sync) return max_bookmark def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): bookmark = get_bookmark(state, self.tap_stream_id, self.replication_keys[0], start_date) max_bookmark = bookmark - full_url = self.build_url(client.base_url) + full_url = self.build_url(client.base_url, self.parent_id) if predefined_filter: LOGGER.info("Syncing tickets with filter {}".format(predefined_filter)) self.params['filter'] = predefined_filter @@ -133,8 +126,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams data = client.request(full_url, self.params) self.paginate = len(data) >= PAGE_SIZE self.params['page'] += 1 - max_bookmark = self.write_records(catalog, state, selected_streams, - start_date, data, max_bookmark, client) + max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync) write_bookmarks(self.tap_stream_id, selected_streams, max_bookmark, state) singer.write_state(state) diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index fa94558..5503bbc 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -57,7 +57,7 @@ def sync(client, config, state, catalog): records_count = {stream:0 for stream in STREAMS.keys()} singer.write_state(state) - for stream in streams_to_sync: + for stream in filter(lambda x: STREAMS[x]().parent is None, streams_to_sync): stream_obj = STREAMS[stream]() write_schemas(stream, catalog, selected_streams) From 8c197ac154bf818e3e4def10daa20d96b48c8201 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Wed, 24 Aug 2022 13:02:53 +0000 Subject: [PATCH 08/32] updated parent child code --- tap_freshdesk/__init__.py | 16 ++--- tap_freshdesk/streams.py | 148 ++++++++++++++++++++++++-------------- 2 files changed, 101 insertions(+), 63 deletions(-) diff --git a/tap_freshdesk/__init__.py b/tap_freshdesk/__init__.py index e1956c3..2413526 100644 --- a/tap_freshdesk/__init__.py +++ b/tap_freshdesk/__init__.py @@ -14,14 +14,14 @@ def main(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) config = args.config - client = FreshdeskClient(config) - if args.discover: - catalog = _discover() - catalog.dump() - else: - catalog = args.catalog \ - if args.catalog else _discover() - _sync(client, config, args.state, catalog.to_dict()) + with FreshdeskClient(config) as client: + if args.discover: + catalog = _discover() + catalog.dump() + else: + catalog = args.catalog \ + if args.catalog else _discover() + _sync(client, config, args.state, catalog.to_dict()) if __name__ == "__main__": main() diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 2a3382d..6203d48 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -1,30 +1,28 @@ -from datetime import datetime +import copy +from datetime import datetime as dt import singer -from singer import bookmarks +from singer.bookmarks import get_bookmark LOGGER = singer.get_logger() PAGE_SIZE = 100 +DATETIME_FMT = "%Y-%m-%dT%H:%M:%SZ" -def get_bookmark(state, stream_name, bookmark_key, start_date): - """ - Return bookmark value if available in the state otherwise return start date - """ - return bookmarks.get_bookmark(state, stream_name, bookmark_key, start_date) - -def get_min_bookmark(stream, selected_streams, bookmark, start_date, state, bookmark_key): +def get_min_bookmark(stream, streams_to_sync, start_date, state, bookmark_key, predefined_filter=None): """ Get the minimum bookmark from the parent and its corresponding child bookmarks. """ stream_obj = STREAMS[stream]() - min_bookmark = bookmark - if stream in selected_streams: + min_bookmark = dt.strftime(dt.now(), DATETIME_FMT) + if stream in streams_to_sync: + if predefined_filter: + stream = stream + '_' + predefined_filter min_bookmark = min(min_bookmark, get_bookmark(state, stream, bookmark_key, start_date)) - for child in filter(lambda x: x in selected_streams, stream_obj.children): - min_bookmark = min(min_bookmark, get_min_bookmark(child, selected_streams, bookmark, start_date, state, bookmark_key)) + for child in filter(lambda x: x in streams_to_sync, stream_obj.children): + min_bookmark = min(min_bookmark, get_min_bookmark(child, streams_to_sync, start_date, state, bookmark_key)) return min_bookmark @@ -35,11 +33,14 @@ def get_schema(catalog, stream_id): stream_catalog = [cat for cat in catalog if cat['tap_stream_id'] == stream_id ][0] return stream_catalog -def write_bookmarks(stream, selected_streams, bookmark_value, state): +def write_bookmark(stream, selected_streams, bookmark_value, state, predefined_filter=None): + """If the stream is selected, write the bookmark""" stream_obj = STREAMS[stream]() - # If the stream is selected, write the bookmark. + stream_id = stream_obj.tap_stream_id if stream in selected_streams: - singer.write_bookmark(state, stream_obj.tap_stream_id, stream_obj.replication_keys[0], bookmark_value) + if predefined_filter: + stream_id = stream_id + '_' + predefined_filter + singer.write_bookmark(state, stream_id, stream_obj.replication_keys[0], bookmark_value) class Stream: """ @@ -56,8 +57,7 @@ class Stream: params = {"per_page": PAGE_SIZE, "page": 1} paginate = True parent = None - data_key = None - child_data_key = None + id_key = None records_count = {} force_str = False date_filter = False @@ -86,9 +86,13 @@ def sync_child_stream(self, parent_id, catalog, state, selected_stream_ids, star child_obj.sync_obj(state, start_date, client, catalog, selected_stream_ids, streams_to_sync) return max_bookmark - def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync): + def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter=None): stream_catalog = get_schema(catalog, self.tap_stream_id) - bookmark = get_bookmark(state, self.tap_stream_id, self.replication_keys[0], start_date) + stream_id = self.tap_stream_id + if predefined_filter: + self.params['filter'] = predefined_filter + stream_id = stream_id + '_' + predefined_filter + bookmark = get_bookmark(state, stream_id, self.replication_keys[0], start_date) with singer.metrics.record_counter(self.tap_stream_id) as counter: with singer.Transformer() as transformer: @@ -105,85 +109,72 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ counter.increment(1) # Write selected child records - if self.children and self.child_data_key in row: - max_bookmark = self.sync_child_stream(row[self.child_data_key], catalog, state, selected_streams, start_date, max_bookmark, client, streams_to_sync) + if self.children and self.id_key in row: + max_bookmark = self.sync_child_stream(row[self.id_key], catalog, state, selected_streams, start_date, max_bookmark, client, streams_to_sync) return max_bookmark def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): - bookmark = get_bookmark(state, self.tap_stream_id, self.replication_keys[0], start_date) - max_bookmark = bookmark full_url = self.build_url(client.base_url, self.parent_id) if predefined_filter: LOGGER.info("Syncing tickets with filter {}".format(predefined_filter)) self.params['filter'] = predefined_filter + min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) + max_bookmark = min_bookmark if self.date_filter: - self.params['updated_since'] = bookmark + self.params['updated_since'] = min_bookmark self.params['page'] = 1 + self.paginate = True - LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, bookmark)) + LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, min_bookmark)) while self.paginate: data = client.request(full_url, self.params) self.paginate = len(data) >= PAGE_SIZE self.params['page'] += 1 - max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync) - write_bookmarks(self.tap_stream_id, selected_streams, max_bookmark, state) + max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) + + write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) singer.write_state(state) class Agents(Stream): tap_stream_id = 'agents' - key_properties = 'id' + key_properties = ['id'] replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'agents' class Companies(Stream): tap_stream_id = 'companies' - key_properties = 'id' + key_properties = ['id'] replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'companies' -class Conversations(Stream): - tap_stream_id = 'conversations' - key_properties = 'id' - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' - path = 'tickets/{}/conversations' - parent = 'tickets' - class Groups(Stream): tap_stream_id = 'groups' - key_properties = 'id' + key_properties = ['id'] replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'groups' class Roles(Stream): tap_stream_id = 'roles' - key_properties = 'id' + key_properties = ['id'] replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'roles' -class SatisfactionRatings(Stream): - tap_stream_id = 'satisfaction_ratings' - key_properties = 'id' - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' - path = 'tickets/{}/satisfaction_ratings' - parent = 'tickets' - date_filter = True class Tickets(Stream): tap_stream_id = 'tickets' - key_properties = 'id' + key_properties = ['id'] replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'tickets' children = ['conversations', 'satisfaction_ratings', 'time_entries'] - child_data_key = 'id' + id_key = 'id' + date_filter = True params = { "per_page": PAGE_SIZE, 'order_by': replication_keys[0], @@ -192,18 +183,65 @@ class Tickets(Stream): } def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): - super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync) - super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, 'deleted') - super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, 'spam') + max_states = [] + for each_filter in [None, 'deleted', 'spam']: + dup_state = copy.deepcopy(state) + super().sync_obj(dup_state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) + max_states.append(dup_state) + + stream_list = set() + for st in max_states: + stream_list = stream_list.union(set(st.get("bookmarks", {}).keys())) + for stream in stream_list: + singer.write_bookmark(state, stream, "updated_at", max(map(get_bookmark(state, stream, "updated_at", start_date), max_states))) + + +class ChildStream(Stream): + + def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): + full_url = self.build_url(client.base_url, self.parent_id) + min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) + max_bookmark = min_bookmark + self.params['page'] = 1 + self.paginate = True -class TimeEntries(Stream): + LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, min_bookmark)) + while self.paginate: + data = client.request(full_url, self.params) + self.paginate = len(data) >= PAGE_SIZE + self.params['page'] += 1 + max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) + + write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) + singer.write_state(state) + +class Conversations(ChildStream): + tap_stream_id = 'conversations' + key_properties = ['id'] + replication_keys = ['updated_at'] + replication_method = 'INCREMENTAL' + path = 'tickets/{}/conversations' + parent = 'tickets' + + +class SatisfactionRatings(ChildStream): + tap_stream_id = 'satisfaction_ratings' + key_properties = ['id'] + replication_keys = ['updated_at'] + replication_method = 'INCREMENTAL' + path = 'tickets/{}/satisfaction_ratings' + parent = 'tickets' + date_filter = True + +class TimeEntries(ChildStream): tap_stream_id = 'time_entries' - key_properties = 'id' + key_properties = ['id'] replication_keys = ['updated_at'] replication_method = 'INCREMENTAL' path = 'tickets/{}/time_entries' parent = 'tickets' + STREAMS = { "agents": Agents, "companies": Companies, From 314d916b43ba55a660463e536457bae81573f636 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 05:29:21 +0000 Subject: [PATCH 09/32] added currently syncing and updated bookmark logic --- tap_freshdesk/streams.py | 28 ++++++++++++++++------------ tap_freshdesk/sync.py | 32 ++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 6203d48..8e2d715 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -132,9 +132,9 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams self.paginate = len(data) >= PAGE_SIZE self.params['page'] += 1 max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) - write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) - singer.write_state(state) + + return state class Agents(Stream): @@ -183,18 +183,22 @@ class Tickets(Stream): } def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): - max_states = [] + dup_state = copy.deepcopy(state) + max_child_bms = {} for each_filter in [None, 'deleted', 'spam']: - dup_state = copy.deepcopy(state) - super().sync_obj(dup_state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) - max_states.append(dup_state) + # Update child bookmark to original_state + for child in filter(lambda s: s in selected_streams, self.children): + state = singer.write_bookmark(state, child, "updated_at", get_bookmark(dup_state, child, "updated_at", start_date)) - stream_list = set() - for st in max_states: - stream_list = stream_list.union(set(st.get("bookmarks", {}).keys())) - for stream in stream_list: - singer.write_bookmark(state, stream, "updated_at", max(map(get_bookmark(state, stream, "updated_at", start_date), max_states))) + super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) + max_child_bms.update({child: max(max_child_bms.get(child, ""), get_bookmark(state, child, "updated_at", start_date)) + for child in self.children + if child in selected_streams}) + + for child, bm in max_child_bms.items(): + singer.write_bookmark(state, child, "updated_at", bm) + return state class ChildStream(Stream): @@ -213,7 +217,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) - singer.write_state(state) + return state class Conversations(ChildStream): tap_stream_id = 'conversations' diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index 5503bbc..b16503d 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -32,6 +32,26 @@ def get_selected_streams(catalog): selected_streams.append(stream['tap_stream_id']) return selected_streams +def update_currently_syncing(state, stream_name): + """ + Updates currently syncing stream in the state. + """ + if not stream_name and singer.get_currently_syncing(state): + del state['currently_syncing'] + else: + singer.set_currently_syncing(state, stream_name) + singer.write_state(state) + +def get_ordered_stream_list(currently_syncing, streams_to_sync): + """ + Get an ordered list of remaining streams to sync other streams followed by synced streams. + """ + stream_list = list(sorted(streams_to_sync)) + if currently_syncing in stream_list: + index = stream_list.index(currently_syncing) + stream_list = stream_list[index:] + stream_list[:index] + return stream_list + def get_stream_to_sync(selected_streams): """ Get the streams for which the sync function should be called(the parent in case of selected child streams). @@ -57,13 +77,17 @@ def sync(client, config, state, catalog): records_count = {stream:0 for stream in STREAMS.keys()} singer.write_state(state) + currently_syncing = singer.get_currently_syncing(state) + streams_to_sync = get_ordered_stream_list(currently_syncing, streams_to_sync) for stream in filter(lambda x: STREAMS[x]().parent is None, streams_to_sync): stream_obj = STREAMS[stream]() write_schemas(stream, catalog, selected_streams) + update_currently_syncing(state, stream) - stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], - selected_streams, records_count) + state = stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], + selected_streams, streams_to_sync) + singer.write_state(state) - for stream_name, stream_count in records_count.items(): - LOGGER.info('%s: %d', stream_name, stream_count) + # for stream_name, stream_count in records_count.items(): + # LOGGER.info('%s: %d', stream_name, stream_count) From 0785754172ff3a06544fdfdd8c35106969066bc4 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 06:26:06 +0000 Subject: [PATCH 10/32] updated child bookmarking logic --- tap_freshdesk/streams.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 8e2d715..3560217 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -93,6 +93,8 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ self.params['filter'] = predefined_filter stream_id = stream_id + '_' + predefined_filter bookmark = get_bookmark(state, stream_id, self.replication_keys[0], start_date) + child_max_bookmark = None + child_max_bookmarks = {} with singer.metrics.record_counter(self.tap_stream_id) as counter: with singer.Transformer() as transformer: @@ -109,9 +111,14 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ counter.increment(1) # Write selected child records - if self.children and self.id_key in row: - max_bookmark = self.sync_child_stream(row[self.id_key], catalog, state, selected_streams, start_date, max_bookmark, client, streams_to_sync) - return max_bookmark + for child in self.children: + child_obj = STREAMS[child]() + child_max_bookmark = get_bookmark(state, child_obj.tap_stream_id, child_obj.replication_keys[0], start_date) + if child in selected_streams: + child_obj.parent_id = row['id'] + child_max_bookmark = max(child_max_bookmark, child_obj.sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync)) + child_max_bookmarks[child] = child_max_bookmark + return max_bookmark, child_max_bookmarks def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): full_url = self.build_url(client.base_url, self.parent_id) @@ -131,9 +138,11 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams data = client.request(full_url, self.params) self.paginate = len(data) >= PAGE_SIZE self.params['page'] += 1 - max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) - write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) + max_bookmark, child_max_bookmarks = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) + write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) + for key, value in child_max_bookmarks.items(): + write_bookmark(key, selected_streams, value, state, None) return state @@ -188,9 +197,9 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams for each_filter in [None, 'deleted', 'spam']: # Update child bookmark to original_state for child in filter(lambda s: s in selected_streams, self.children): - state = singer.write_bookmark(state, child, "updated_at", get_bookmark(dup_state, child, "updated_at", start_date)) + singer.write_bookmark(state, child, "updated_at", get_bookmark(dup_state, child, "updated_at", start_date)) - super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) + state = super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) max_child_bms.update({child: max(max_child_bms.get(child, ""), get_bookmark(state, child, "updated_at", start_date)) for child in self.children @@ -204,7 +213,7 @@ class ChildStream(Stream): def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): full_url = self.build_url(client.base_url, self.parent_id) - min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) + min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], None) max_bookmark = min_bookmark self.params['page'] = 1 self.paginate = True @@ -214,10 +223,9 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams data = client.request(full_url, self.params) self.paginate = len(data) >= PAGE_SIZE self.params['page'] += 1 - max_bookmark = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) - - write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) - return state + bookmark, _ = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, None) + max_bookmark = max(max_bookmark, bookmark) + return max_bookmark class Conversations(ChildStream): tap_stream_id = 'conversations' From 445a3fd3a4f8cb3884ff8216906407cb8337097b Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 10:25:00 +0000 Subject: [PATCH 11/32] added code comments and updated unittests --- setup.py | 5 + tap_freshdesk/streams.py | 105 +++++++++------ tap_freshdesk/sync.py | 2 +- tests/unittests/test_bookmark_handling.py | 71 ++++++++++ tests/unittests/test_check_access_token.py | 22 +++ tests/unittests/test_currently_syncing.py | 56 ++++++++ tests/unittests/test_main.py | 101 ++++++++++++++ tests/unittests/test_streams.py | 147 +++++++++++++++++++++ tests/unittests/test_sync.py | 138 +++++++++++++++++++ 9 files changed, 608 insertions(+), 39 deletions(-) create mode 100644 tests/unittests/test_bookmark_handling.py create mode 100644 tests/unittests/test_check_access_token.py create mode 100644 tests/unittests/test_currently_syncing.py create mode 100644 tests/unittests/test_main.py create mode 100644 tests/unittests/test_streams.py create mode 100644 tests/unittests/test_sync.py diff --git a/setup.py b/setup.py index 67d9ead..cbbc079 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,11 @@ 'requests==2.20.0', 'backoff==1.8.0' ], + extras_require={ + 'dev': [ + 'pylint', + ] + }, entry_points=''' [console_scripts] tap-freshdesk=tap_freshdesk:main diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 3560217..32ec2a4 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -28,13 +28,15 @@ def get_min_bookmark(stream, streams_to_sync, start_date, state, bookmark_key, p def get_schema(catalog, stream_id): """ - Return catalog of the specified stream. + Return the catalog of the specified stream. """ stream_catalog = [cat for cat in catalog if cat['tap_stream_id'] == stream_id ][0] return stream_catalog def write_bookmark(stream, selected_streams, bookmark_value, state, predefined_filter=None): - """If the stream is selected, write the bookmark""" + """ + Write the bookmark in case the stream is selected. + """ stream_obj = STREAMS[stream]() stream_id = stream_obj.tap_stream_id if stream in selected_streams: @@ -60,12 +62,16 @@ class Stream: id_key = None records_count = {} force_str = False - date_filter = False + date_filter = '' parent_id = None + filters = [] + filter_keyword = '' def transform_dict(self, d, key_key="name", value_key="value", force_str=False): - # Custom fields are expected to be strings, but sometimes the API sends - # booleans. We cast those to strings to match the schema. + """ + Custom fields are expected to be strings, but sometimes the API sends + booleans. We cast those to strings to match the schema. + """ rtn = [] for k, v in d.items(): if force_str: @@ -74,25 +80,24 @@ def transform_dict(self, d, key_key="name", value_key="value", force_str=False): return rtn def build_url(self, base_url, *args): + """ + Build the full url with parameters and attributes. + """ return base_url + '/api/v2/'+ self.path.format(*args) - def sync_child_stream(self, parent_id, catalog, state, selected_stream_ids, start_date, max_bookmark, client, streams_to_sync): - - for child in self.children: - child_obj = STREAMS[child]() - - if child in selected_stream_ids: - child_obj.parent_id = parent_id - child_obj.sync_obj(state, start_date, client, catalog, selected_stream_ids, streams_to_sync) - return max_bookmark - def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter=None): + """ + Transform the chunk of records according to the schema and write the records based on the bookmark. + """ stream_catalog = get_schema(catalog, self.tap_stream_id) stream_id = self.tap_stream_id + + # Append the predefined filter in case it's present if predefined_filter: - self.params['filter'] = predefined_filter + self.params[self.filter_keyword] = predefined_filter stream_id = stream_id + '_' + predefined_filter bookmark = get_bookmark(state, stream_id, self.replication_keys[0], start_date) + # The max bookmark so far for the child stream child_max_bookmark = None child_max_bookmarks = {} @@ -110,12 +115,13 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ max_bookmark = max(max_bookmark, rec[self.replication_keys[0]]) counter.increment(1) - # Write selected child records + # Sync the child streams if they are selected for child in self.children: child_obj = STREAMS[child]() child_max_bookmark = get_bookmark(state, child_obj.tap_stream_id, child_obj.replication_keys[0], start_date) if child in selected_streams: child_obj.parent_id = row['id'] + # Update the child's max_bookmark as the max of the two child_max_bookmark = max(child_max_bookmark, child_obj.sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync)) child_max_bookmarks[child] = child_max_bookmark return max_bookmark, child_max_bookmarks @@ -124,16 +130,18 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams full_url = self.build_url(client.base_url, self.parent_id) if predefined_filter: LOGGER.info("Syncing tickets with filter {}".format(predefined_filter)) - self.params['filter'] = predefined_filter + self.params[self.filter_keyword] = predefined_filter min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) max_bookmark = min_bookmark + # Add the `updated_since` param if the date_filter attribute is True if self.date_filter: - self.params['updated_since'] = min_bookmark + self.params[self.date_filter] = min_bookmark self.params['page'] = 1 self.paginate = True LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, min_bookmark)) + # Paginate through the request while self.paginate: data = client.request(full_url, self.params) self.paginate = len(data) >= PAGE_SIZE @@ -141,6 +149,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams max_bookmark, child_max_bookmarks = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) + # Write the max_bookmark for the child streams in the state files if they are selected. for key, value in child_max_bookmarks.items(): write_bookmark(key, selected_streams, value, state, None) return state @@ -174,27 +183,14 @@ class Roles(Stream): replication_method = 'INCREMENTAL' path = 'roles' - -class Tickets(Stream): - tap_stream_id = 'tickets' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' - path = 'tickets' - children = ['conversations', 'satisfaction_ratings', 'time_entries'] - id_key = 'id' - date_filter = True - params = { - "per_page": PAGE_SIZE, - 'order_by': replication_keys[0], - 'order_type': "asc", - 'include': "requester,company,stats" - } - +class DateFilteredStream(Stream): def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): + """ + The overridden sync_obj() method to fetch the ticket records with different filters. + """ dup_state = copy.deepcopy(state) max_child_bms = {} - for each_filter in [None, 'deleted', 'spam']: + for each_filter in self.filters: # Update child bookmark to original_state for child in filter(lambda s: s in selected_streams, self.children): singer.write_bookmark(state, child, "updated_at", get_bookmark(dup_state, child, "updated_at", start_date)) @@ -208,10 +204,42 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams for child, bm in max_child_bms.items(): singer.write_bookmark(state, child, "updated_at", bm) return state +class Tickets(DateFilteredStream): + tap_stream_id = 'tickets' + key_properties = ['id'] + replication_keys = ['updated_at'] + replication_method = 'INCREMENTAL' + path = 'tickets' + children = ['conversations', 'satisfaction_ratings', 'time_entries'] + id_key = 'id' + date_filter = 'updated_since' + params = { + "per_page": PAGE_SIZE, + 'order_by': replication_keys[0], + 'order_type': "asc", + 'include': "requester,company,stats" + } + filter_keyword = 'filter' + filters = [None, 'deleted', 'spam'] + +class Contacts(DateFilteredStream): + tap_stream_id = 'contacts' + key_properties = ['id'] + replication_keys = ['updated_at'] + replication_method = 'INCREMENTAL' + path = 'contacts' + id_key = 'id' + date_filter = '_updated_since' + filter_keyword = 'state' + filters = [None, 'deleted', 'blocked'] + class ChildStream(Stream): def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): + """ + The child stream sync_obj() method to sync the child records + """ full_url = self.build_url(client.base_url, self.parent_id) min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], None) max_bookmark = min_bookmark @@ -219,6 +247,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams self.paginate = True LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, min_bookmark)) + # Paginate through the records while self.paginate: data = client.request(full_url, self.params) self.paginate = len(data) >= PAGE_SIZE @@ -243,7 +272,6 @@ class SatisfactionRatings(ChildStream): replication_method = 'INCREMENTAL' path = 'tickets/{}/satisfaction_ratings' parent = 'tickets' - date_filter = True class TimeEntries(ChildStream): tap_stream_id = 'time_entries' @@ -257,6 +285,7 @@ class TimeEntries(ChildStream): STREAMS = { "agents": Agents, "companies": Companies, + "contacts": Contacts, "conversations": Conversations, "groups": Groups, "roles": Roles, diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index b16503d..310d667 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -85,7 +85,7 @@ def sync(client, config, state, catalog): write_schemas(stream, catalog, selected_streams) update_currently_syncing(state, stream) - state = stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], + stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], selected_streams, streams_to_sync) singer.write_state(state) diff --git a/tests/unittests/test_bookmark_handling.py b/tests/unittests/test_bookmark_handling.py new file mode 100644 index 0000000..a1bca25 --- /dev/null +++ b/tests/unittests/test_bookmark_handling.py @@ -0,0 +1,71 @@ +import unittest +from parameterized import parameterized +from tap_freshdesk.streams import get_min_bookmark, get_schema, write_bookmark + + +class TestGetMinBookmark(unittest.TestCase): + """ + Test `get_min_bookmark` method of the stream class + """ + + start_date = "2020-04-01T00:00:00Z" + state = { + "bookmarks": { + "tickets": {"updated_at": "2022-03-29T00:00:00Z"}, + "conversations": {"updated_at": "2022-03-01T00:00:00Z"}, + "satisfaction_ratings": {"updated_at": "2022-03-14T00:00:00Z"}, + "time_entries": {"updated_at": "2022-04-01T00:00:00Z"}, + } + } + + @parameterized.expand([ + ["with_child_selected", "tickets", ["tickets", "satisfaction_ratings"], "updated_at", "2022-03-14T00:00:00Z"], + ["only_children_selected", "tickets", ["satisfaction_ratings","conversations", "time_entries"], "updated_at", "2022-03-01T00:00:00Z"], + ["only_parent_selected", "tickets", ["tickets"], "updated_at", "2022-03-29T00:00:00Z"], + ]) + def test_min_bookmark(self, name, stream_name, stream_to_sync, bookmark_key, expected_bookmark): + """ + Test that `get_min_bookmark` method returns the minimum bookmark from the parent and its corresponding child bookmarks. + """ + min_bookmark = get_min_bookmark(stream_name, stream_to_sync, self.start_date, self.state, bookmark_key) + + # Verify returned bookmark is expected + self.assertEqual(min_bookmark, expected_bookmark) + + +class TestGetSchema(unittest.TestCase): + """ + Test `get_schema` method of the stream class. + """ + + def test_get_schema(self): + """Verify function returns expected schema""" + catalog = [ + {"tap_stream_id": "roles"}, + {"tap_stream_id": "agents"}, + {"tap_stream_id": "time_entries"}, + ] + expected_schema = {"tap_stream_id": "agents"} + + # Verify returned schema is same as exected schema + self.assertEqual(get_schema(catalog, "agents"), expected_schema) + + +class TestWriteBookmark(unittest.TestCase): + """ + Test the `write_bookmark` method of the stream class + """ + + @parameterized.expand([ + ["stream_not_selected", "agents", False, {"bookmarks": {}}], + ["stream_not_selected", "groups", True, {"bookmarks": {"groups": {"updated_at": "BOOKMARK_VALUE"}}}], + ]) + def test_write_bookmark(self, name, stream, is_called, expected_state): + """ + Test that bookmark is written only if the stream is selected + """ + state = {"bookmarks": {}} + write_bookmark(stream, ["roles", "groups"], "BOOKMARK_VALUE", state) + + # Verify that the final state is as expected + self.assertEqual(state, expected_state) diff --git a/tests/unittests/test_check_access_token.py b/tests/unittests/test_check_access_token.py new file mode 100644 index 0000000..aadaa2d --- /dev/null +++ b/tests/unittests/test_check_access_token.py @@ -0,0 +1,22 @@ +import unittest +from unittest import mock +from tap_freshdesk import client + + +class TestAccessToken(unittest.TestCase): + """ + Test `check_access_token` method of client class + """ + + @mock.patch("tap_freshdesk.client.FreshdeskClient.request") + def test_access_token(self, mock_request): + """ + Test that to check access token a request call is made. + """ + config = {"domain": "sampleDomain"} + _client = client.FreshdeskClient(config) + _client.check_access_token() + + # Verify that for check access token, `request` method was called + self.assertTrue(mock_request.called) + mock_request.assert_called_with("https://sampleDomain.freshdesk.com/api/v2/roles", mock.ANY) diff --git a/tests/unittests/test_currently_syncing.py b/tests/unittests/test_currently_syncing.py new file mode 100644 index 0000000..c37cc52 --- /dev/null +++ b/tests/unittests/test_currently_syncing.py @@ -0,0 +1,56 @@ +import unittest +from unittest import mock +from tap_freshdesk.sync import (update_currently_syncing, + get_ordered_stream_list) + +class TestGetOrderedStreamList(unittest.TestCase): + """ + Test `get_ordered_stream_list` function to get ordered list od streams + """ + + streams_to_sync = ["agents", "companies", "tickets", "conversations", "groups", "satisfaction_ratings", "time_entries"] + + def test_currently_syncing_not_in_list(self): + """Test if currently syncing is not available in `streams_to_sync` list, function returns sorted streams_to_sync list.""" + expected_list = ["agents", "companies", "conversations", "groups", "satisfaction_ratings", "tickets", "time_entries"] + final_list = get_ordered_stream_list("roles", self.streams_to_sync) + + # Verify with expected ordered list of streams + self.assertEqual(final_list, expected_list) + + def test_for_interrupted_sync(self): + """Test when the sync was interrupted, the function returns ordered list of streams starting with 'currently_syncing' stream.""" + expected_list = ["groups", "satisfaction_ratings", "tickets", "time_entries", "agents", "companies", "conversations"] + final_list = get_ordered_stream_list("groups", self.streams_to_sync) + + # Verify with expected ordered list of streams + self.assertEqual(final_list, expected_list) + + def test_for_completed_sync(self): + """Test when sync was not interrupted, the function returns sorted streams_to_sync list.""" + expected_list = ["agents", "companies", "conversations", "groups", "satisfaction_ratings", "tickets", "time_entries"] + final_list = get_ordered_stream_list(None, self.streams_to_sync) + + # Verify with expected ordered list of streams + self.assertEqual(final_list, expected_list) + +class TestUpdateCurrentlySyncing(unittest.TestCase): + + """ + Test `update_currently_syncing` function of sync. + """ + def test_update_syncing_stream(self): + """Test for adding currently syncing stream in state.""" + state = {"currently_syncing": "groups"} + update_currently_syncing(state, "groups") + + # Verify with expected state + self.assertEqual(state, {"currently_syncing": "groups"}) + + def test_flush_currently_syncing(self): + """Test for removing currently syncing stream from state.""" + state = {"currently_syncing": "agents"} + update_currently_syncing(state, None) + + # Verify with expected state + self.assertEqual(state, {}) \ No newline at end of file diff --git a/tests/unittests/test_main.py b/tests/unittests/test_main.py new file mode 100644 index 0000000..d56cdef --- /dev/null +++ b/tests/unittests/test_main.py @@ -0,0 +1,101 @@ +import unittest +from unittest import mock +from singer.catalog import Catalog +from tap_freshdesk import main +from tap_freshdesk.discover import discover + +class MockArgs: + """Mock args object class""" + + def __init__(self, config = None, catalog = None, state = {}, discover = False) -> None: + self.config = config + self.catalog = catalog + self.state = state + self.discover = discover + +@mock.patch("tap_freshdesk.FreshdeskClient") +@mock.patch("singer.utils.parse_args") +class TestDiscoverMode(unittest.TestCase): + """ + Test main function for discover mode + """ + + mock_config = {"start_date": "", "access_token": ""} + + @mock.patch("tap_freshdesk._discover") + def test_discover_with_config(self, mock_discover, mock_args, mock_verify_access): + """Test `_discover` function is called for discover mode""" + mock_discover.return_value = Catalog([]) + mock_args.return_value = MockArgs(discover = True, config = self.mock_config) + main() + + # Verify that `discover` was called + self.assertTrue(mock_discover.called) + + +@mock.patch("tap_freshdesk.FreshdeskClient.check_access_token") +@mock.patch("singer.utils.parse_args") +@mock.patch("tap_freshdesk._sync") +class TestSyncMode(unittest.TestCase): + """ + Test main function for sync mode + """ + + mock_config = {"start_date": "", "access_token": ""} + mock_catalog = {"streams": [{"stream": "teams", "schema": {}, "metadata": {}}]} + + @mock.patch("tap_freshdesk._discover") + def test_sync_with_catalog(self, mock_discover, mock_sync, mock_args, mock_check_access_token): + """Test sync mode with catalog given in args""" + + mock_args.return_value = MockArgs(config=self.mock_config, catalog=Catalog.from_dict(self.mock_catalog)) + main() + + # Verify `_sync` is called with expected arguments + mock_sync.assert_called_with(mock.ANY, self.mock_config, {}, self.mock_catalog) + + # verify `_discover` function is not called + self.assertFalse(mock_discover.called) + + @mock.patch("tap_freshdesk._discover") + def test_sync_without_catalog(self, mock_discover, mock_sync, mock_args, mock_check_access_token): + """Test sync mode without catalog given in args""" + + mock_discover.return_value = catalog=Catalog.from_dict(self.mock_catalog) + mock_args.return_value = MockArgs(config=self.mock_config) + main() + + # Verify `_sync` is called with expected arguments + mock_sync.assert_called_with(mock.ANY, self.mock_config, {}, self.mock_catalog) + + # verify `_discover` function is called + self.assertTrue(mock_discover.called) + + def test_sync_with_state(self, mock_sync, mock_args, mock_check_access_token): + """Test sync mode with state given in args""" + mock_state = {"bookmarks": {"projec ts": ""}} + mock_args.return_value = MockArgs(config=self.mock_config, catalog=Catalog.from_dict(self.mock_catalog), state=mock_state) + main() + + # Verify `_sync` is called with expected arguments + mock_sync.assert_called_with(mock.ANY, self.mock_config, mock_state, self.mock_catalog) + +class TestDiscover(unittest.TestCase): + """Test `discover` function.""" + + def test_discover(self): + return_catalog = discover() + + # Verify discover function returns `Catalog` type object. + self.assertIsInstance(return_catalog, Catalog) + + @mock.patch("tap_freshdesk.discover.Schema") + @mock.patch("tap_freshdesk.discover.LOGGER.error") + def test_discover_error_handling(self, mock_logger, mock_schema): + """Test discover function if exception arises.""" + mock_schema.from_dict.side_effect = Exception + with self.assertRaises(Exception): + discover() + + # Verify logger called 3 times when an exception arises. + self.assertEqual(mock_logger.call_count, 3) diff --git a/tests/unittests/test_streams.py b/tests/unittests/test_streams.py new file mode 100644 index 0000000..789bd6c --- /dev/null +++ b/tests/unittests/test_streams.py @@ -0,0 +1,147 @@ +import unittest +from unittest import mock +from parameterized import parameterized +from tap_freshdesk.streams import Agents, Tickets + + +class TestSyncObj(unittest.TestCase): + """ + Test `sync_obj` mehtod of stream. + """ + + start_date = "2019-06-01T00:00:00Z" + only_parent_response = [ + [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [1,5,2]], # Tickets Response + [{"id": "33", "updated_at": f"2020-03-01T00:00:00Z"}], # Deleted tickets Response + [{"id": "55", "updated_at": f"2020-04-01T00:00:00Z"}], # Spam tickets Response + ] + written_states_1 = { + "tickets": "2020-05-01T00:00:00Z", + "tickets_deleted": "2020-03-01T00:00:00Z", + "tickets_spam": "2020-04-01T00:00:00Z", + } + with_child_response = [ + [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [1,5,2]], # Tickets Response + [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [2,4]], # conversations Response + [{"id": "33", "updated_at": "2020-03-01T00:00:00Z"}], # conversations Response + [{"id": "55", "updated_at": "2020-04-01T00:00:00Z"}], # conversations Response + [],[] # Deleted/Spam tickets response + ] + written_states_2 = { + "conversations": "2020-04-01T00:00:00Z", + } + written_states_3 = { + "tickets": "2020-05-01T00:00:00Z", + "conversations": "2020-04-01T00:00:00Z", + } + expected_state_1 = { + "conversations": {"updated_at": "2020-04-01T00:00:00Z"}, + "tickets": {"updated_at": "2020-03-15T00:00:00Z"}, + "tickets_deleted": {"updated_at": "2020-05-01T00:00:00Z"}, + "tickets_spam": {"updated_at": "2020-04-01T00:00:00Z"} + } + expected_state_2 = {'conversations': {'updated_at': '2020-04-01T00:00:00Z'}, + 'tickets': {'updated_at': '2019-06-01T00:00:00Z'}, + 'tickets_deleted': {'updated_at': '2020-05-01T00:00:00Z'}, + 'tickets_spam': {'updated_at': '2020-04-01T00:00:00Z'}} + expected_state_3 = { + **expected_state_1, + "tickets": {"updated_at": "2020-03-16T00:00:00Z"}, + } + + @parameterized.expand([ + ["parent_selected", ["tickets"], ["tickets"], only_parent_response, 5, written_states_1], + ["child_selected", ["conversations"], ["tickets", "conversations"], with_child_response, 4, written_states_2], + ["parent_child_both_selected", ["tickets", "conversations"], ["tickets", "conversations"], with_child_response, 7, written_states_3], + ]) + @mock.patch("singer.write_record") + @mock.patch("singer.write_bookmark") + def test_stream(self, name, selected_streams, streams_to_sync, responses, written_records, written_states, mock_write_bookmark, mock_write_record): + """ + Test that stream is writing records and bookmarks only if selected. + """ + stream = Tickets() + state = {} + client = mock.Mock() + client.base_url = "" + client.request.side_effect = responses + catalog = [ + {"schema":{}, "tap_stream_id": "tickets", "metadata": []}, + {"schema":{}, "tap_stream_id": "conversations", "metadata": []} + ] + + stream.sync_obj(state, self.start_date, client, catalog, selected_streams, streams_to_sync) + + # Verify expected records are written + self.assertEqual(mock_write_record.call_count, written_records) + + # Verify max bookmark is updated for all selected streams + for stream, bookmark in written_states.items(): + mock_write_bookmark.assert_any_call({}, stream, "updated_at", bookmark) + + + @parameterized.expand([ + ["without_state", dict(), expected_state_1, 13], + ["with_parent_state", {"bookmarks": {"tickets": {"updated_at": "2020-03-16T00:00:00Z"}}}, expected_state_2, 10], + ["with_child_state", {"bookmarks": {"conversations": {"updated_at": "2020-03-23T00:00:00Z"}}}, expected_state_1, 8], + ["with_both_state", {"bookmarks": {"tickets": {"updated_at": "2020-03-16T00:00:00Z"}, "conversations": {"updated_at": "2020-03-23T00:00:00Z"}}}, expected_state_3, 5], + ]) + @mock.patch("singer.write_record") + def test_parent_child_both_selected(self, name, state, expected_state, written_records, mock_write_record): + """ + Test parent and child streams both selected in given conditions: + - Without state + - With only parent bookmark + - With only child bookmark + - With both parent and child bookmark + """ + stream = Tickets() + client = mock.Mock() + client.base_url = "" + client.request.side_effect = [ + [{"id": i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [11,15,12]], # Tickets Response + [{"id": 10+i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [13,24]], # conversations Response + [{"id": 13, "updated_at": "2020-03-01T00:00:00Z"}], # conversations Response + [{"id": 95, "updated_at": "2020-04-01T00:00:00Z"}], # conversations Response + [{"id": 73, "updated_at": "2020-05-01T00:00:00Z"}], # Deleted tickets response + [{"id": 30+i, "updated_at": f"2020-03-{i}T00:00:00Z"}for i in [22,10]], # conversations response + [{"id": 43, "updated_at": "2020-04-01T00:00:00Z"}], # Spam tickets response + [{"id": 50+i, "updated_at": f"2020-03-{i}T00:00:00Z"}for i in [12,25]], # conversations response + ] + catalog = [ + {"schema":{}, "tap_stream_id": "tickets", "metadata": []}, + {"schema":{}, "tap_stream_id": "conversations", "metadata": []} + ] + + stream.sync_obj(state, self.start_date, client, catalog, ["tickets", "conversations"], ["tickets", "conversations"]) + self.assertEqual(mock_write_record.call_count, written_records) + self.assertDictEqual(state, {"bookmarks": expected_state}) + + +class TestSyncTransformDict(unittest.TestCase): + """ + Test `transform_dict` method of stream class. + """ + + stream = Agents() + expected_list_1 = [{"name": "Agency", "value": "Justice League"}, + {"name": "Department", "value": "Superhero"}] + expected_list_2 = [{"key": "Agency", "data": "Justice League"}, + {"key": "Department", "data": "Superhero"}] + expected_list_3 = [{"name": "Agency", "value": "justice league"}, + {"name": "Department", "value": "superhero"}] + @parameterized.expand([ + ["coverting_dict_to_list", {"Agency": "Justice League", "Department": "Superhero"}, expected_list_1, {}], + ["With_custom_keys", {"Agency": "Justice League", "Department": "Superhero"}, expected_list_2, {"key_key":"key", "value_key":"data"}], + ["With_string_value", {"Agency": "Justice League", "Department": "Superhero"}, expected_list_3, {"force_str": True}], + ]) + def test_transform(self, name, dictionary, expected_list, kwargs): + """ + Test that the dictionary is transformed as per given conditions: + - Value is a lowercase string when force_str: True + - Key-Values can be customized by passing in args + """ + returned_list = self.stream.transform_dict(dictionary, **kwargs) + + # Verify returned list is expected + self.assertEqual(returned_list, expected_list) diff --git a/tests/unittests/test_sync.py b/tests/unittests/test_sync.py new file mode 100644 index 0000000..5d2bb38 --- /dev/null +++ b/tests/unittests/test_sync.py @@ -0,0 +1,138 @@ +import unittest +from unittest import mock +from parameterized import parameterized +from tap_freshdesk.sync import (write_schemas, get_selected_streams, + get_stream_to_sync, sync) + + +def get_stream_catalog(stream_name, is_selected = False): + """Return catalog for stream""" + return { + "schema":{}, + "tap_stream_id": stream_name, + "metadata": [ + { + "breadcrumb": [], + "metadata":{"selected": is_selected} + } + ], + "key_properties": [] + } + + +def get_catalog(parent=False, child=False): + """Return complete catalog""" + + return { + "streams": [ + get_stream_catalog("agents"), + get_stream_catalog("companies", parent), + get_stream_catalog("conversations", child), + get_stream_catalog("tickets", parent), + get_stream_catalog("time_entries", child), + get_stream_catalog("groups", parent), + ] + } + + + +class TestSyncFunctions(unittest.TestCase): + """ + Test `sync` function. + """ + + # NOTE: For `tickets` stream `sync_obj` is called 3 times + @parameterized.expand([ + ["only_parent_selected", get_catalog(parent=True), ["companies", "tickets", "groups"], 5], + ["only_child_selected", get_catalog(child=True), ["conversations", "time_entries"], 3], + ["both_selected", get_catalog(parent=True, child=True), ["companies", "tickets", "groups", "conversations", "time_entries"], 5], + ["No_streams_selected", get_catalog(), [], 0], + ]) + @mock.patch("singer.write_state") + @mock.patch("singer.write_schema") + @mock.patch("tap_freshdesk.streams.Stream.sync_obj") + def test_sync(self, name, mock_catalog, selected_streams, synced_streams, mock_sync_endpoint, mock_write_schemas, mock_write_state): + """ + Test sync function. + """ + client = mock.Mock() + sync(client, {'start_date': ""}, {}, mock_catalog) + + # Verify write schema is called for selected streams + self.assertEqual(mock_write_schemas.call_count, len(selected_streams)) + for stream in selected_streams: + mock_write_schemas.assert_any_call(stream, mock.ANY, mock.ANY) + + # Verify sync object was called for syncing parent streams + self.assertEqual(mock_sync_endpoint.call_count, synced_streams) + + +class TestWriteSchemas(unittest.TestCase): + """ + Test `write_schemas` function. + """ + + mock_catalog = {"streams": [ + get_stream_catalog("tickets"), + get_stream_catalog("time_entries"), + get_stream_catalog("conversations") + ]} + + @parameterized.expand([ + ["parents_selected", ["tickets"]], + ["child_selected", ["time_entries"]], + ["parent_and_child_selected", ["tickets", "conversations"]], + ]) + @mock.patch("singer.write_schema") + def test_write_schema(self, name, selected_streams, mock_write_schema): + """ + Test that only schema is written for only selected streams. + """ + write_schemas("tickets", self.mock_catalog, selected_streams) + for stream in selected_streams: + # Verify write_schema function is called. + mock_write_schema.assert_any_call(stream, mock.ANY, mock.ANY) + + +class TestGetStreamsToSync(unittest.TestCase): + """ + Testcase for `get_stream_to_sync` in sync. + """ + + @parameterized.expand([ + ['test_parent_selected', ["tickets"], ["tickets"]], + ['test_child_selected', ["conversations", "satisfaction_ratings"], ["conversations", "satisfaction_ratings", "tickets"]], + ['test_both_selected', ["conversations", "roles", "tickets"], ["conversations", "roles", "tickets"]] + ]) + def test_sync_streams(self, name, selected_streams, expected_streams): + """ + Test that if an only child is selected in the catalog, + then `get_stream_to_sync` returns the parent stream also. + """ + sync_streams = get_stream_to_sync(selected_streams) + + # Verify that the expected list of streams is returned + self.assertEqual(sync_streams, expected_streams) + + +class TestGetSelectedStreams(unittest.TestCase): + """ + Testcase for `get_selected_streams` in sync. + """ + + def test_streams_selection(self): + """ + Test that `get_selected_streams` returns the list of selected streams. + """ + catalog = {"streams": [ + get_stream_catalog("tickets", is_selected=True), + get_stream_catalog("roles", is_selected=True), + get_stream_catalog("contacts"), + get_stream_catalog("groups", is_selected=True), + get_stream_catalog("agents"), + ]} + expected_streams = ["tickets", "roles", "groups"] + selected_streams = get_selected_streams(catalog) + + # Verify expected list is returned + self.assertEqual(selected_streams, expected_streams) From c316fdfa69d3f5582e5b33fa2cdf46a5ff69d953 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 10:28:26 +0000 Subject: [PATCH 12/32] updated streams.py file --- tap_freshdesk/streams.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 32ec2a4..6efec8e 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -195,7 +195,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams for child in filter(lambda s: s in selected_streams, self.children): singer.write_bookmark(state, child, "updated_at", get_bookmark(dup_state, child, "updated_at", start_date)) - state = super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) + super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) max_child_bms.update({child: max(max_child_bms.get(child, ""), get_bookmark(state, child, "updated_at", start_date)) for child in self.children @@ -204,6 +204,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams for child, bm in max_child_bms.items(): singer.write_bookmark(state, child, "updated_at", bm) return state + class Tickets(DateFilteredStream): tap_stream_id = 'tickets' key_properties = ['id'] From 968ebdcf42eacb67ecff47af6b929509e69606c4 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 10:31:47 +0000 Subject: [PATCH 13/32] removed unused code --- tap_freshdesk/sync.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index 310d667..06b5a16 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -73,9 +73,6 @@ def sync(client, config, state, catalog): LOGGER.info("Selected Streams: %s", selected_streams) LOGGER.info("Syncing Streams: %s", streams_to_sync) - # Initializing a dictionary to keep track of record count by streams - records_count = {stream:0 for stream in STREAMS.keys()} - singer.write_state(state) currently_syncing = singer.get_currently_syncing(state) streams_to_sync = get_ordered_stream_list(currently_syncing, streams_to_sync) @@ -88,6 +85,3 @@ def sync(client, config, state, catalog): stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], selected_streams, streams_to_sync) singer.write_state(state) - - # for stream_name, stream_count in records_count.items(): - # LOGGER.info('%s: %d', stream_name, stream_count) From c11a8f4094a0b763dbaeaf7fa5c7d5a6e52b7ac1 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 11:44:38 +0000 Subject: [PATCH 14/32] make pylint happy --- tap_freshdesk/streams.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 6efec8e..7eef2a8 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -50,11 +50,12 @@ class Stream: """ tap_stream_id = None replication_method = None - replication_keys = None + replication_keys = [] key_properties = [] endpoint = None filter_param = False children = [] + path = '' headers = {} params = {"per_page": PAGE_SIZE, "page": 1} paginate = True @@ -101,7 +102,7 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ child_max_bookmark = None child_max_bookmarks = {} - with singer.metrics.record_counter(self.tap_stream_id) as counter: + with singer.metrics.record_counter(self.tap_stream_id) as counter: with singer.Transformer() as transformer: extraction_time = singer.utils.now() stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) @@ -129,7 +130,7 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): full_url = self.build_url(client.base_url, self.parent_id) if predefined_filter: - LOGGER.info("Syncing tickets with filter {}".format(predefined_filter)) + LOGGER.info("Syncing tickets with filter %s", predefined_filter) self.params[self.filter_keyword] = predefined_filter min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) max_bookmark = min_bookmark @@ -140,7 +141,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams self.params['page'] = 1 self.paginate = True - LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, min_bookmark)) + LOGGER.info("Syncing %s from %s", self.tap_stream_id, min_bookmark) # Paginate through the request while self.paginate: data = client.request(full_url, self.params) @@ -198,9 +199,9 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) max_child_bms.update({child: max(max_child_bms.get(child, ""), get_bookmark(state, child, "updated_at", start_date)) - for child in self.children + for child in self.children if child in selected_streams}) - + for child, bm in max_child_bms.items(): singer.write_bookmark(state, child, "updated_at", bm) return state @@ -247,7 +248,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams self.params['page'] = 1 self.paginate = True - LOGGER.info("Syncing {} from {}".format(self.tap_stream_id, min_bookmark)) + LOGGER.info("Syncing %s from %s", self.tap_stream_id, min_bookmark) # Paginate through the records while self.paginate: data = client.request(full_url, self.params) From 69fa2a5ebed6647c06a1b497a64ae1085e102e5a Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 11:47:42 +0000 Subject: [PATCH 15/32] make pylint happy --- tap_freshdesk/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index 03e7208..4570682 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -39,7 +39,7 @@ def check_access_token(self): giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, factor=2) @utils.ratelimit(1, 2) - def request(self, url, params={}): + def request(self, url, params=None): """ Call rest API and return the response in case of status code 200. """ @@ -48,13 +48,13 @@ def request(self, url, params={}): headers['User-Agent'] = self.config['user_agent'] req = requests.Request('GET', url, params=params, auth=(self.config['api_key'], ""), headers=headers).prepare() - LOGGER.info("GET {}".format(req.url)) + LOGGER.info("GET %s", req.url) response = self.session.send(req) # Call the function again if the rate limit is exceeded if 'Retry-After' in response.headers: retry_after = int(response.headers['Retry-After']) - LOGGER.info("Rate limit reached. Sleeping for {} seconds".format(retry_after)) + LOGGER.info("Rate limit reached. Sleeping for %s seconds", retry_after) time.sleep(retry_after) return self.request(url, params) From 1837bf32869c3e5275f2ebda3ef7bdb27c880d60 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 11:52:36 +0000 Subject: [PATCH 16/32] made pylint happy --- tap_freshdesk/schema.py | 2 +- tap_freshdesk/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tap_freshdesk/schema.py b/tap_freshdesk/schema.py index 257150b..9b1baa8 100644 --- a/tap_freshdesk/schema.py +++ b/tap_freshdesk/schema.py @@ -21,7 +21,7 @@ def get_schemas(): for stream_name, stream_metadata in STREAMS.items(): schema_path = get_abs_path('schemas/{}.json'.format(stream_name)) - with open(schema_path) as file: + with open(schema_path) as file: # pylint: disable=unspecified-encoding schema = json.load(file) schemas[stream_name] = schema diff --git a/tap_freshdesk/utils.py b/tap_freshdesk/utils.py index ffa3b6f..79d61fd 100644 --- a/tap_freshdesk/utils.py +++ b/tap_freshdesk/utils.py @@ -48,7 +48,7 @@ def get_abs_path(path): def load_json(path): - with open(path) as f: + with open(path) as f: # pylint: disable=unspecified-encoding return json.load(f) From 2adbed75020066acd9c6a8324d5d4b0aa155a95b Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 13:03:20 +0000 Subject: [PATCH 17/32] resolved issue of params getting used for other streams --- tap_freshdesk/streams.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 7eef2a8..810c561 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -90,12 +90,13 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ """ Transform the chunk of records according to the schema and write the records based on the bookmark. """ + params = copy.deepcopy(self.params) stream_catalog = get_schema(catalog, self.tap_stream_id) stream_id = self.tap_stream_id # Append the predefined filter in case it's present if predefined_filter: - self.params[self.filter_keyword] = predefined_filter + params[self.filter_keyword] = predefined_filter stream_id = stream_id + '_' + predefined_filter bookmark = get_bookmark(state, stream_id, self.replication_keys[0], start_date) # The max bookmark so far for the child stream @@ -128,25 +129,29 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ return max_bookmark, child_max_bookmarks def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): + """ + The base stream class sync_obj() function to fetch records. + """ + params = copy.deepcopy(self.params) full_url = self.build_url(client.base_url, self.parent_id) if predefined_filter: - LOGGER.info("Syncing tickets with filter %s", predefined_filter) - self.params[self.filter_keyword] = predefined_filter + LOGGER.info("Syncing %s with filter %s", self.tap_stream_id, predefined_filter) + params[self.filter_keyword] = predefined_filter min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) max_bookmark = min_bookmark # Add the `updated_since` param if the date_filter attribute is True if self.date_filter: - self.params[self.date_filter] = min_bookmark - self.params['page'] = 1 + params[self.date_filter] = min_bookmark + params['page'] = 1 self.paginate = True LOGGER.info("Syncing %s from %s", self.tap_stream_id, min_bookmark) # Paginate through the request while self.paginate: - data = client.request(full_url, self.params) + data = client.request(full_url, params) self.paginate = len(data) >= PAGE_SIZE - self.params['page'] += 1 + params['page'] += 1 max_bookmark, child_max_bookmarks = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) @@ -242,18 +247,19 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams """ The child stream sync_obj() method to sync the child records """ + params = copy.deepcopy(self.params) full_url = self.build_url(client.base_url, self.parent_id) min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], None) max_bookmark = min_bookmark - self.params['page'] = 1 + params['page'] = 1 self.paginate = True LOGGER.info("Syncing %s from %s", self.tap_stream_id, min_bookmark) # Paginate through the records while self.paginate: - data = client.request(full_url, self.params) + data = client.request(full_url, params) self.paginate = len(data) >= PAGE_SIZE - self.params['page'] += 1 + params['page'] += 1 bookmark, _ = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, None) max_bookmark = max(max_bookmark, bookmark) return max_bookmark From 4b8394bdc45fdb239745b596b1a8be5c513f7394 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Thu, 25 Aug 2022 19:16:23 +0000 Subject: [PATCH 18/32] fixed the issue for bookmarking of child streams and added more code comments --- tap_freshdesk/streams.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 810c561..2a64e40 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -19,6 +19,7 @@ def get_min_bookmark(stream, streams_to_sync, start_date, state, bookmark_key, p if stream in streams_to_sync: if predefined_filter: stream = stream + '_' + predefined_filter + # LOGGER.info(f'======= {stream} {get_bookmark(state, stream, bookmark_key, start_date)}') min_bookmark = min(min_bookmark, get_bookmark(state, stream, bookmark_key, start_date)) for child in filter(lambda x: x in streams_to_sync, stream_obj.children): @@ -86,7 +87,7 @@ def build_url(self, base_url, *args): """ return base_url + '/api/v2/'+ self.path.format(*args) - def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter=None): + def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, child_max_bookmarks, predefined_filter=None): """ Transform the chunk of records according to the schema and write the records based on the bookmark. """ @@ -101,7 +102,7 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ bookmark = get_bookmark(state, stream_id, self.replication_keys[0], start_date) # The max bookmark so far for the child stream child_max_bookmark = None - child_max_bookmarks = {} + # child_max_bookmarks = {} with singer.metrics.record_counter(self.tap_stream_id) as counter: with singer.Transformer() as transformer: @@ -109,6 +110,8 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) for row in data: if self.tap_stream_id in selected_streams and row[self.replication_keys[0]] >= bookmark: + # Custom fields are expected to be strings, but sometimes the API sends + # booleans. We cast those to strings to match the schema. if 'custom_fields' in row: row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) @@ -123,8 +126,8 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ child_max_bookmark = get_bookmark(state, child_obj.tap_stream_id, child_obj.replication_keys[0], start_date) if child in selected_streams: child_obj.parent_id = row['id'] - # Update the child's max_bookmark as the max of the two - child_max_bookmark = max(child_max_bookmark, child_obj.sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync)) + # Update the child's max_bookmark as the max of the already present max value and the return value + child_max_bookmark = max(child_max_bookmarks[child], child_obj.sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync)) child_max_bookmarks[child] = child_max_bookmark return max_bookmark, child_max_bookmarks @@ -134,11 +137,19 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams """ params = copy.deepcopy(self.params) full_url = self.build_url(client.base_url, self.parent_id) + + # Update the filter keyword in the params for date-filtered streams if predefined_filter: LOGGER.info("Syncing %s with filter %s", self.tap_stream_id, predefined_filter) params[self.filter_keyword] = predefined_filter + + # Get the minimum bookmark from the parent and the child streams min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) max_bookmark = min_bookmark + # Set the child_max_bookmarks dictionary to the minimum bookmark + child_max_bookmarks = {} + for child in self.children: + child_max_bookmarks[child] = min_bookmark # Add the `updated_since` param if the date_filter attribute is True if self.date_filter: @@ -152,7 +163,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams data = client.request(full_url, params) self.paginate = len(data) >= PAGE_SIZE params['page'] += 1 - max_bookmark, child_max_bookmarks = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, predefined_filter) + max_bookmark, child_max_bookmarks = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, child_max_bookmarks, predefined_filter) write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) # Write the max_bookmark for the child streams in the state files if they are selected. @@ -203,13 +214,14 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) + # Update the max child bookmarks dictionary with the maximum from the child and the existing bookmark max_child_bms.update({child: max(max_child_bms.get(child, ""), get_bookmark(state, child, "updated_at", start_date)) for child in self.children if child in selected_streams}) + # Write the child stream bookmarks with the max value found for child, bm in max_child_bms.items(): singer.write_bookmark(state, child, "updated_at", bm) - return state class Tickets(DateFilteredStream): tap_stream_id = 'tickets' @@ -248,7 +260,9 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams The child stream sync_obj() method to sync the child records """ params = copy.deepcopy(self.params) + # Build the url for the request full_url = self.build_url(client.base_url, self.parent_id) + # Get the min bookmark from the parent and the child streams min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], None) max_bookmark = min_bookmark params['page'] = 1 From 7085c8463d81fa98182471731340d9ad77d116b5 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Fri, 26 Aug 2022 05:16:49 +0000 Subject: [PATCH 19/32] removed commented code and optimized --- tap_freshdesk/streams.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 2a64e40..4fbc908 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -19,7 +19,6 @@ def get_min_bookmark(stream, streams_to_sync, start_date, state, bookmark_key, p if stream in streams_to_sync: if predefined_filter: stream = stream + '_' + predefined_filter - # LOGGER.info(f'======= {stream} {get_bookmark(state, stream, bookmark_key, start_date)}') min_bookmark = min(min_bookmark, get_bookmark(state, stream, bookmark_key, start_date)) for child in filter(lambda x: x in streams_to_sync, stream_obj.children): @@ -123,11 +122,11 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ # Sync the child streams if they are selected for child in self.children: child_obj = STREAMS[child]() - child_max_bookmark = get_bookmark(state, child_obj.tap_stream_id, child_obj.replication_keys[0], start_date) if child in selected_streams: child_obj.parent_id = row['id'] + child_max_bookmark = get_bookmark(state, child_obj.tap_stream_id, child_obj.replication_keys[0], start_date) # Update the child's max_bookmark as the max of the already present max value and the return value - child_max_bookmark = max(child_max_bookmarks[child], child_obj.sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync)) + child_max_bookmark = max(child_max_bookmarks.get(child, child_max_bookmark), child_obj.sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync)) child_max_bookmarks[child] = child_max_bookmark return max_bookmark, child_max_bookmarks @@ -146,10 +145,8 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams # Get the minimum bookmark from the parent and the child streams min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) max_bookmark = min_bookmark - # Set the child_max_bookmarks dictionary to the minimum bookmark + # Initialize the child_max_bookmarks dictionary child_max_bookmarks = {} - for child in self.children: - child_max_bookmarks[child] = min_bookmark # Add the `updated_since` param if the date_filter attribute is True if self.date_filter: @@ -203,7 +200,7 @@ class Roles(Stream): class DateFilteredStream(Stream): def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): """ - The overridden sync_obj() method to fetch the ticket records with different filters. + The overridden sync_obj() method to fetch the records with different filters. """ dup_state = copy.deepcopy(state) max_child_bms = {} @@ -274,6 +271,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams data = client.request(full_url, params) self.paginate = len(data) >= PAGE_SIZE params['page'] += 1 + # Write the records based on the bookmark and return the max_bookmark for the page bookmark, _ = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, None) max_bookmark = max(max_bookmark, bookmark) return max_bookmark From 38df735162119b0468422cba22178a8187b46946 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Fri, 26 Aug 2022 06:03:30 +0000 Subject: [PATCH 20/32] make pylint happy --- tap_freshdesk/streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 4fbc908..4bd16cf 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -109,7 +109,7 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) for row in data: if self.tap_stream_id in selected_streams and row[self.replication_keys[0]] >= bookmark: - # Custom fields are expected to be strings, but sometimes the API sends + # Custom fields are expected to be strings, but sometimes the API sends # booleans. We cast those to strings to match the schema. if 'custom_fields' in row: row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) From c294a79f4fa8a8fe3de9cbfa0cf38a7d81f35e55 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Fri, 26 Aug 2022 09:22:09 +0000 Subject: [PATCH 21/32] added currently syncing as None at the end of the sync and added integration tests --- tap_freshdesk/sync.py | 1 + tests/base.py | 2 +- tests/test_freshdesk_interrupted_sync.py | 157 +++++++++++++++ ...t_freshdesk_interrupted_sync_add_stream.py | 161 +++++++++++++++ ...reshdesk_interrupted_sync_remove_stream.py | 187 ++++++++++++++++++ 5 files changed, 507 insertions(+), 1 deletion(-) create mode 100644 tests/test_freshdesk_interrupted_sync.py create mode 100644 tests/test_freshdesk_interrupted_sync_add_stream.py create mode 100644 tests/test_freshdesk_interrupted_sync_remove_stream.py diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index 06b5a16..0ef1822 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -85,3 +85,4 @@ def sync(client, config, state, catalog): stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], selected_streams, streams_to_sync) singer.write_state(state) + update_currently_syncing(state, None) diff --git a/tests/base.py b/tests/base.py index ad10329..c7307b1 100644 --- a/tests/base.py +++ b/tests/base.py @@ -20,7 +20,7 @@ class FreshdeskBaseTest(unittest.TestCase): start_date = "" START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z" # %H:%M:%SZ BOOKMARK_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" - + RECORD_REPLICATION_KEY_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" OBEYS_START_DATE = "obey-start-date" ####################################### diff --git a/tests/test_freshdesk_interrupted_sync.py b/tests/test_freshdesk_interrupted_sync.py new file mode 100644 index 0000000..f1ddf1a --- /dev/null +++ b/tests/test_freshdesk_interrupted_sync.py @@ -0,0 +1,157 @@ +from tap_tester import connections, runner, menagerie +from base import FreshdeskBaseTest + + +class TestFreshdeskInterruptedSync(FreshdeskBaseTest): + """Test tap's ability to recover from an interrupted sync""" + + @staticmethod + def name(): + return "tt_freshdesk_interrupted_sync_test" + + def get_properties(self): + """ + Maintain states for start_date and end_date + """ + return { + 'start_date' : '2021-10-01T00:00:00Z', + } + + def test_run(self): + """ + Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream), + the next sync job kicks off and the tap picks back up on that `currently_syncing` stream of `currently_syncing_repo`. + """ + streams_to_test = {"roles", "agents", "groups", "companies"} + conn_id = connections.ensure_connection(self) + expected_replication_methods = self.expected_replication_method() + expected_replication_keys = self.expected_replication_keys() + + start_date = self.dt_to_ts(self.get_properties().get("start_date"), self.START_DATE_FORMAT) + + # Run a discovery job + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # Partition catalogs for use in table/field selection + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Run a sync + self.run_and_verify_sync(conn_id) + + # Acquire records from the target output + full_sync_records = runner.get_records_from_target_output() + full_sync_state = menagerie.get_state(conn_id) + + # Set state in which all streams of one repo(singer-io/singer-python) have completed a sync. + # And one stream (pull_requests) of other repo(singer-io/test-repo) is syncing currently. + + interrupted_state = { + "currently_syncing": "roles", + "bookmarks": { + "agents": { + "updated_at": "2022-08-25T04:35:47.000000Z" + }, + "companies": { + "updated_at": "2022-08-22T13:58:07.000000Z" + }, + "groups": { + "updated_at": "2022-08-18T05:13:56.000000Z" + }, + "roles": { + "updated_at": "2022-07-19T11:49:58.000000Z" + } + } + } + + menagerie.set_state(conn_id, interrupted_state) + + # Run another sync + self.run_and_verify_sync(conn_id) + + # acquire records from target output + interrupted_sync_records = runner.get_records_from_target_output() + final_state = menagerie.get_state(conn_id) + currently_syncing = final_state.get('currently_syncing') + + # Checking resuming sync resulted in a successfully saved state + with self.subTest(): + + # Verify sync is not interrupted by checking currently_syncing in the state for sync + self.assertIsNone(currently_syncing) + + # Verify bookmarks are saved + self.assertIsNotNone(final_state.get('bookmarks')) + + # Verify final_state is equal to uninterrupted sync's state + # (This is what the value would have been without an interruption and proves resuming succeeds) + self.assertDictEqual(final_state, full_sync_state) + + full_sync_bookmark = full_sync_state["bookmarks"] + final_bookmark = final_state["bookmarks"] + interrupted_repo_bookmark = interrupted_state["bookmarks"] + + for stream in streams_to_test: + with self.subTest(stream=stream): + + # Expected values + expected_replication_method = expected_replication_methods[stream] + expected_primary_keys = list(self.expected_primary_keys()[stream]) + + # Gather results + full_records = [message['data'] for message in + full_sync_records.get(stream, {}).get('messages', [])] + full_record_count = len(full_records) + + interrupted_records = [message['data'] for message in + interrupted_sync_records.get(stream, {}).get('messages', [])] + interrupted_record_count = len(interrupted_records) + + if expected_replication_method == self.INCREMENTAL: + expected_replication_key = next(iter(expected_replication_keys[stream])) + + if stream in interrupted_repo_bookmark.keys(): + interrupted_bookmark = self.dt_to_ts(interrupted_repo_bookmark[stream]["updated_at"], self.BOOKMARK_FORMAT) + + if stream == interrupted_state['currently_syncing']: + + for record in interrupted_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, interrupted_bookmark) + + # Verify all interrupted recs are in full recs + self.assertIn(record, full_records, msg='incremental table record in interrupted sync not found in full sync') + + # Record count for all streams of interrupted sync match expectations + full_records_after_interrupted_bookmark = 0 + + for record in full_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, start_date) + + if (rec_time >= interrupted_bookmark): + full_records_after_interrupted_bookmark += 1 + + self.assertEqual(full_records_after_interrupted_bookmark, len(interrupted_records), \ + msg="Expected {} records in each sync".format(full_records_after_interrupted_bookmark)) + else: + # Verify we collected records that have the same replication value as a bookmark for streams that are already synced + self.assertGreaterEqual(interrupted_record_count, 0) + else: + # Verify resuming sync replicates all records that were found in the full sync (uninterrupted) + for record in interrupted_records: + with self.subTest(record_primary_key=record[expected_primary_keys[0]]): + self.assertIn(record, full_records, msg='Unexpected record replicated in resuming sync.') + for record in full_records: + with self.subTest(record_primary_key=record[expected_primary_keys[0]]): + self.assertIn(record, interrupted_records, msg='Record missing from resuming sync.' ) + else: + # Verify full table streams do not save bookmarked values at the conclusion of a successful sync + self.assertNotIn(stream, full_sync_bookmark.keys()) + self.assertNotIn(stream, final_bookmark.keys()) + + # Verify first and second sync have the same records + self.assertEqual(full_record_count, interrupted_record_count) + for rec in interrupted_records: + self.assertIn(rec, full_records, msg='full table record in interrupted sync not found in full sync') diff --git a/tests/test_freshdesk_interrupted_sync_add_stream.py b/tests/test_freshdesk_interrupted_sync_add_stream.py new file mode 100644 index 0000000..3b55c04 --- /dev/null +++ b/tests/test_freshdesk_interrupted_sync_add_stream.py @@ -0,0 +1,161 @@ +from tap_tester import connections, runner, menagerie +from base import FreshdeskBaseTest + + +class TestFreshdeskInterruptedSyncAddStream(FreshdeskBaseTest): + """Test tap's ability to recover from an interrupted sync""" + + @staticmethod + def name(): + return "tt_freshdesk_interrupted_sync_add_stream_test" + + def get_properties(self): + """ + Maintain states for start_date and end_date + """ + return { + 'start_date' : '2022-07-19T00:00:00Z' + } + + def test_run(self): + """ + Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream) and `currently_syncing_repo`, + the next sync job kicks off and the tap picks back up on that `currently_syncing` stream of `currently_syncing_repo`. + - Verify behavior is consistent when an added stream is selected between initial and resuming sync + """ + streams_to_test = {"agents", "groups", "companies"} + conn_id = connections.ensure_connection(self) + expected_replication_methods = self.expected_replication_method() + expected_replication_keys = self.expected_replication_keys() + + start_date = self.dt_to_ts(self.get_properties().get("start_date"), self.START_DATE_FORMAT) + + # Run a discovery job + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # Partition catalogs for use in table/field selection + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Run a sync + self.run_and_verify_sync(conn_id) + + # Acquire records from the target output + full_sync_records = runner.get_records_from_target_output() + full_sync_state = menagerie.get_state(conn_id) + + # Add a stream between syncs + added_stream = 'roles' + streams_to_test.add(added_stream) + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + # Add new stream to selected list + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Set state in which one stream (roles) is syncing currently. + + interrupted_state = { + "currently_syncing": "roles", + "bookmarks": { + "agents": { + "updated_at": "2022-08-25T04:35:47.000000Z" + }, + "companies": { + "updated_at": "2022-08-22T13:58:07.000000Z" + }, + "groups": { + "updated_at": "2022-08-18T05:13:56.000000Z" + } + } + } + + menagerie.set_state(conn_id, interrupted_state) + + # Run another sync + self.run_and_verify_sync(conn_id) + + # acquire records from target output + interrupted_sync_records = runner.get_records_from_target_output() + final_state = menagerie.get_state(conn_id) + currently_syncing = final_state.get('currently_syncing') + + # Checking resuming sync resulted in a successfully saved state + with self.subTest(): + + # Verify sync is not interrupted by checking currently_syncing in the state for sync + self.assertIsNone(currently_syncing) + + # Verify bookmarks are saved + self.assertIsNotNone(final_state.get('bookmarks')) + + full_sync_bookmark = full_sync_state["bookmarks"] + final_bookmark = final_state["bookmarks"] + interrupted_repo_bookmark = interrupted_state["bookmarks"] + + for stream in streams_to_test: + with self.subTest(stream=stream): + + # Expected values + expected_replication_method = expected_replication_methods[stream] + + # Gather results + if stream != added_stream: + full_records = [message['data'] for message in + full_sync_records.get(stream, {}).get('messages', [])] + full_record_count = len(full_records) + + interrupted_records = [message['data'] for message in + interrupted_sync_records.get(stream, {}).get('messages', [])] + interrupted_record_count = len(interrupted_records) + + if expected_replication_method == self.INCREMENTAL: + expected_replication_key = next(iter(expected_replication_keys[stream])) + + if stream in full_sync_bookmark.keys(): + full_sync_stream_bookmark = self.dt_to_ts(full_sync_bookmark.get(stream, {}).get("updated_at"), self.BOOKMARK_FORMAT) + final_sync_stream_bookmark = self.dt_to_ts(final_bookmark.get(stream, {}).get("updated_at"), self.BOOKMARK_FORMAT) + + if stream in interrupted_repo_bookmark.keys(): + interrupted_bookmark = self.dt_to_ts(interrupted_repo_bookmark[stream]["updated_at"], self.BOOKMARK_FORMAT) + + for record in interrupted_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, interrupted_bookmark) + + else: + # verify we collected records that have the same replication value as a bookmark for streams that are already synced + self.assertGreater(interrupted_record_count, 0) + + if stream != added_stream: + + # Verify state ends with the same value for common streams after both full and interrupted syncs + self.assertEqual(full_sync_stream_bookmark, final_sync_stream_bookmark) + + for record in interrupted_records: + + # Verify all interrupted recs are in full recs + self.assertIn(record, full_records, msg='incremental table record in interrupted sync not found in full sync') + + # Record count for all streams of interrupted sync match expectations + full_records_after_interrupted_bookmark = 0 + + for record in full_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreater(rec_time, start_date, msg=f"{expected_replication_key} {stream} {record}") + + if (rec_time >= interrupted_bookmark): + full_records_after_interrupted_bookmark += 1 + + self.assertGreaterEqual(full_records_after_interrupted_bookmark, interrupted_record_count, \ + msg="Expected max {} records in each sync".format(full_records_after_interrupted_bookmark)) + + else: + # Verify full table streams do not save bookmarked values after a successful sync + self.assertNotIn(stream, full_sync_bookmark.keys()) + self.assertNotIn(stream, final_bookmark.keys()) + + # Verify first and second sync have the same records + self.assertEqual(full_record_count, interrupted_record_count) + for rec in interrupted_records: + self.assertIn(rec, full_records, msg='full table record in interrupted sync not found in full sync') \ No newline at end of file diff --git a/tests/test_freshdesk_interrupted_sync_remove_stream.py b/tests/test_freshdesk_interrupted_sync_remove_stream.py new file mode 100644 index 0000000..cff92e5 --- /dev/null +++ b/tests/test_freshdesk_interrupted_sync_remove_stream.py @@ -0,0 +1,187 @@ +from tap_tester import connections, runner, menagerie +from base import FreshdeskBaseTest + + +class TestFreshdeskInterruptedSyncRemoveStream(FreshdeskBaseTest): + """Test tap's ability to recover from an interrupted sync""" + + @staticmethod + def name(): + return "tt_freshdesk_interrupted_sync_remove_stream_test" + + def get_properties(self): + """ + Maintain states for start_date and end_date + """ + return { + 'start_date' : '2022-07-19T00:00:00Z' + } + + def test_run(self): + + # Test for removing any stream from state + self.run_interrupted_sync("groups") + + # Test for removing currently syncing stream from state + self.run_interrupted_sync("roles") + + def run_interrupted_sync(self, removed_stream): + """ + Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream), + the next sync job kicks off and the tap picks back up on that `currently_syncing` stream. + - Verify behavior is consistent when a stream is removed from the selected list between initial and resuming sync. + """ + streams_to_test = {"roles", "agents", "groups", "companies"} + conn_id = connections.ensure_connection(self) + expected_replication_methods = self.expected_replication_method() + expected_replication_keys = self.expected_replication_keys() + + start_date = self.dt_to_ts(self.get_properties().get("start_date"), self.START_DATE_FORMAT) + + # Run a discovery job + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # Partition catalogs for use in table/field selection + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + self.perform_and_verify_table_and_field_selection(conn_id, test_catalogs, select_all_fields=True) + + # Run a sync + self.run_and_verify_sync(conn_id) + + # Acquire records from target output + full_sync_records = runner.get_records_from_target_output() + full_sync_state = menagerie.get_state(conn_id) + + # Create new connection for another sync + conn_id_2 = connections.ensure_connection(self) + + # Add a stream between syncs + streams_to_test = streams_to_test - {removed_stream} + found_catalogs = self.run_and_verify_check_mode(conn_id_2) + + test_catalogs = [catalog for catalog in found_catalogs + if catalog.get('stream_name') in streams_to_test] + + # Add new stream to selected list + self.perform_and_verify_table_and_field_selection(conn_id_2, test_catalogs, select_all_fields=True) + + # Set state in with a currently syncing stream + + interrupted_state = { + "currently_syncing": "roles", + "bookmarks": { + "agents": { + "updated_at": "2022-08-25T04:35:47.000000Z" + }, + "companies": { + "updated_at": "2022-08-22T13:58:07.000000Z" + }, + "groups": { + "updated_at": "2022-08-18T05:13:56.000000Z" + }, + "roles": { + "updated_at": "2022-07-19T11:49:58.000000Z" + } + } + } + + menagerie.set_state(conn_id_2, interrupted_state) + + # Run another sync + self.run_and_verify_sync(conn_id_2) + + # Acquire records from target output + interrupted_sync_records = runner.get_records_from_target_output() + final_state = menagerie.get_state(conn_id_2) + currently_syncing = final_state.get('currently_syncing') + + # Checking resuming sync resulted in a successfully saved state + with self.subTest(): + + # Verify sync is not interrupted by checking currently_syncing in the state for sync + self.assertIsNone(currently_syncing) + + # Verify bookmarks are saved + self.assertIsNotNone(final_state.get('bookmarks')) + + + full_sync_bookmark = full_sync_state["bookmarks"] + final_bookmark = final_state["bookmarks"] + interrupted_repo_bookmark = interrupted_state["bookmarks"] + + for stream in list(streams_to_test) + [removed_stream]: + with self.subTest(stream=stream): + + # Expected values + expected_replication_method = expected_replication_methods[stream] + expected_primary_keys = list(self.expected_primary_keys()[stream]) + + # Gather results + full_records = [message['data'] for message in + full_sync_records.get(stream, {}).get('messages', []) ] + full_record_count = len(full_records) + + if stream != removed_stream: + interrupted_records = [message['data'] for message in + interrupted_sync_records.get(stream, {}).get('messages', [])] + interrupted_record_count = len(interrupted_records) + else: + self.assertNotIn(stream, interrupted_sync_records.keys()) + + if expected_replication_method == self.INCREMENTAL: + expected_replication_key = next(iter(expected_replication_keys[stream])) + full_sync_stream_bookmark = self.dt_to_ts(full_sync_bookmark.get(stream, {}).get("updated_at"), self.BOOKMARK_FORMAT) + + if stream in interrupted_repo_bookmark.keys(): + interrupted_bookmark = self.dt_to_ts(interrupted_repo_bookmark[stream]["updated_at"], self.BOOKMARK_FORMAT) + final_sync_stream_bookmark = self.dt_to_ts(final_bookmark.get(stream, {}).get("updated_at"), self.BOOKMARK_FORMAT) + + if stream != removed_stream: + + # Verify state ends with the same value for common streams after both full and interrupted syncs + self.assertEqual(full_sync_stream_bookmark, final_sync_stream_bookmark) + + # Verify resuming sync only replicates records with replication key values greater or equal to + # the interrupted_state for streams that were completed, replicated during the interrupted sync. + for record in interrupted_records: + with self.subTest(record_primary_key=record[expected_primary_keys[0]]): + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreaterEqual(rec_time, interrupted_bookmark) + + # Verify all interrupted recs are in full recs + self.assertIn(record, full_records, msg='Incremental table record in interrupted sync not found in full sync') + + # Record count for all streams of interrupted sync match expectations + full_records_after_interrupted_bookmark = 0 + for record in full_records: + rec_time = self.dt_to_ts(record[expected_replication_key], self.RECORD_REPLICATION_KEY_FORMAT) + self.assertGreater(rec_time, start_date, msg=f"{expected_replication_key} {stream} {record}") + + if (rec_time >= interrupted_bookmark): + full_records_after_interrupted_bookmark += 1 + + self.assertGreaterEqual(full_records_after_interrupted_bookmark, interrupted_record_count, \ + msg="Expected max {} records in each sync".format(full_records_after_interrupted_bookmark)) + else: + # Verify the bookmark has not advanced for the removed stream + self.assertEqual(final_sync_stream_bookmark, interrupted_bookmark) + else: + # verify we collected records that have the same replication value as a bookmark for streams that are already synced + self.assertGreater(interrupted_record_count, 0) + + else: + # Verify full table streams do not save bookmarked values after a successful sync + self.assertNotIn(stream, full_sync_bookmark.keys()) + self.assertNotIn(stream, final_bookmark.keys()) + + # Verify first and second sync have the same records + self.assertEqual(full_record_count, interrupted_record_count) + for rec in interrupted_records: + self.assertIn(rec, full_records, msg='Full table record in interrupted sync not found in full sync') + + # Verify at least 1 record was replicated for each stream + if stream != removed_stream: + self.assertGreater(interrupted_record_count, 0) + + print(f"{stream} resumed sync records replicated: {interrupted_record_count}") \ No newline at end of file From 1229a765e1a276fa88395c6bcc1b59bc6372d1f6 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Fri, 26 Aug 2022 12:33:25 +0000 Subject: [PATCH 22/32] increased code coverage --- tap_freshdesk/client.py | 29 +++++++++-- tap_freshdesk/utils.py | 93 ---------------------------------- tests/unittests/test_client.py | 53 +++++++++++++++++++ 3 files changed, 79 insertions(+), 96 deletions(-) delete mode 100644 tap_freshdesk/utils.py create mode 100644 tests/unittests/test_client.py diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index 4570682..3bf4220 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -1,14 +1,37 @@ import time - import backoff import requests import singer -from tap_freshdesk import utils +import collections +import functools LOGGER = singer.get_logger() BASE_URL = "https://{}.freshdesk.com" +def ratelimit(limit, every): + """ + Keeps minimum seconds(every) of time between two request calls. + """ + def limitdecorator(fn): + times = collections.deque() + + @functools.wraps(fn) + def wrapper(*args, **kwargs): + if len(times) >= limit: + t0 = times.pop() # Takes last call time + t = time.time() # current time + sleep_time = every - (t - t0) # If difference is < every(time) + if sleep_time > 0: # Sleep for remaining time + time.sleep(sleep_time) + + times.appendleft(time.time()) # Appending current time to list + return fn(*args, **kwargs) + + return wrapper + + return limitdecorator + class FreshdeskClient: """ The client class is used for making REST calls to the Freshdesk API. @@ -38,7 +61,7 @@ def check_access_token(self): max_tries=5, giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, factor=2) - @utils.ratelimit(1, 2) + @ratelimit(1, 2) def request(self, url, params=None): """ Call rest API and return the response in case of status code 200. diff --git a/tap_freshdesk/utils.py b/tap_freshdesk/utils.py deleted file mode 100644 index 79d61fd..0000000 --- a/tap_freshdesk/utils.py +++ /dev/null @@ -1,93 +0,0 @@ -import argparse -import collections -import datetime -import functools -import json -import os -import time - -DATETIME_FMT = "%Y-%m-%dT%H:%M:%SZ" - - -def strptime(dt): - return datetime.datetime.strptime(dt, DATETIME_FMT) - - -def strftime(dt): - return dt.strftime(DATETIME_FMT) - - -def ratelimit(limit, every): - def limitdecorator(fn): - times = collections.deque() - - @functools.wraps(fn) - def wrapper(*args, **kwargs): - if len(times) >= limit: - t0 = times.pop() - t = time.time() - sleep_time = every - (t - t0) - if sleep_time > 0: - time.sleep(sleep_time) - - times.appendleft(time.time()) - return fn(*args, **kwargs) - - return wrapper - - return limitdecorator - - -def chunk(l, n): - for i in range(0, len(l), n): - yield l[i:i + n] - - -def get_abs_path(path): - return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) - - -def load_json(path): - with open(path) as f: # pylint: disable=unspecified-encoding - return json.load(f) - - -def load_schema(entity): - return load_json(get_abs_path("schemas/{}.json".format(entity))) - - -def update_state(state, entity, dt): - if dt is None: - return - - if isinstance(dt, datetime.datetime): - dt = strftime(dt) - - if entity not in state: - state[entity] = dt - - if dt >= state[entity]: - state[entity] = dt - - -def parse_args(required_config_keys): - parser = argparse.ArgumentParser() - parser.add_argument('-c', '--config', help='Config file', required=True) - parser.add_argument('-s', '--state', help='State file') - args = parser.parse_args() - - config = load_json(args.config) - check_config(config, required_config_keys) - - if args.state: - state = load_json(args.state) - else: - state = {} - - return config, state - - -def check_config(config, required_keys): - missing_keys = [key for key in required_keys if key not in config] - if missing_keys: - raise Exception("Config is missing required keys: {}".format(missing_keys)) diff --git a/tests/unittests/test_client.py b/tests/unittests/test_client.py new file mode 100644 index 0000000..28e687c --- /dev/null +++ b/tests/unittests/test_client.py @@ -0,0 +1,53 @@ +import unittest +from unittest import mock +from tap_freshdesk import client +import requests +import json + +def get_response(status_code, json_resp={}, headers = None): + """ + Returns mock response + """ + response = requests.Response() + response.status_code = status_code + response._content = json.dumps(json_resp).encode() + if headers: + response.headers = headers + return response + + +class TestAccessToken(unittest.TestCase): + """ + Test `check_access_token` method of client class + """ + + @mock.patch("tap_freshdesk.client.FreshdeskClient.request") + def test_access_token(self, mock_request): + """ + Test that to check access token a request call is made. + """ + config = {"domain": "sampleDomain"} + _client = client.FreshdeskClient(config) + _client.check_access_token() + + # Verify that for check access token, `request` method was called + self.assertTrue(mock_request.called) + mock_request.assert_called_with("https://sampleDomain.freshdesk.com/api/v2/roles", mock.ANY) + + +class TestRateLimit(unittest.TestCase): + """Test `ratelimit` decorator.""" + + @mock.patch("requests.Session.send", return_value = get_response(200)) + @mock.patch("time.sleep") + def test_ratelimit(self, mock_sleep, mock_request): + """ + Test that for consecutive request calls `time.sleep` is called, + if requests calls are made in a very short time(2 seconds). + """ + _client = client.FreshdeskClient({"api_key": "API_KEY"}) + for _ in range(10): + _client.request("https://SAMPLE.URL") + + # Verify that `time.sleep` was called + self.assertTrue(mock_sleep.called) From d16cc368f69ebd7bcb825a4e8effe946aecab68a Mon Sep 17 00:00:00 2001 From: prijendev Date: Mon, 29 Aug 2022 19:00:29 +0530 Subject: [PATCH 23/32] Resolved review comments. --- tap_freshdesk/schema.py | 4 +-- tap_freshdesk/streams.py | 67 ++++++++++++++++++++-------------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/tap_freshdesk/schema.py b/tap_freshdesk/schema.py index 9b1baa8..8d33c3d 100644 --- a/tap_freshdesk/schema.py +++ b/tap_freshdesk/schema.py @@ -27,11 +27,12 @@ def get_schemas(): schemas[stream_name] = schema schema = singer.resolve_schema_references(schema, refs) + replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys mdata = metadata.new() mdata = metadata.get_standard_metadata( schema=schema, key_properties = (hasattr(stream_metadata, 'key_properties') or None) and stream_metadata.key_properties, - valid_replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys, + valid_replication_keys = replication_keys, replication_method = (hasattr(stream_metadata, 'replication_method') or None) and stream_metadata.replication_method ) mdata = metadata.to_map(mdata) @@ -39,7 +40,6 @@ def get_schemas(): # Loop through all keys and make replication keys of automatic inclusion for field_name in schema['properties'].keys(): - replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys if replication_keys and field_name in replication_keys: mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 4fbc908..d8db0c7 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -17,10 +17,12 @@ def get_min_bookmark(stream, streams_to_sync, start_date, state, bookmark_key, p stream_obj = STREAMS[stream]() min_bookmark = dt.strftime(dt.now(), DATETIME_FMT) if stream in streams_to_sync: + # Get minimum of stream's bookmark(start date in case of no bookmark) and min_bookmark if predefined_filter: stream = stream + '_' + predefined_filter min_bookmark = min(min_bookmark, get_bookmark(state, stream, bookmark_key, start_date)) + # Iterate through all children and return minimum bookmark among all. for child in filter(lambda x: x in streams_to_sync, stream_obj.children): min_bookmark = min(min_bookmark, get_min_bookmark(child, streams_to_sync, start_date, state, bookmark_key)) @@ -49,9 +51,9 @@ class Stream: Base class representing tap-freshdesk streams. """ tap_stream_id = None - replication_method = None - replication_keys = [] - key_properties = [] + replication_method = 'INCREMENTAL' + replication_keys = ['updated_at'] + key_properties = ['id'] endpoint = None filter_param = False children = [] @@ -101,7 +103,6 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ bookmark = get_bookmark(state, stream_id, self.replication_keys[0], start_date) # The max bookmark so far for the child stream child_max_bookmark = None - # child_max_bookmarks = {} with singer.metrics.record_counter(self.tap_stream_id) as counter: with singer.Transformer() as transformer: @@ -109,7 +110,7 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ stream_metadata = singer.metadata.to_map(stream_catalog['metadata']) for row in data: if self.tap_stream_id in selected_streams and row[self.replication_keys[0]] >= bookmark: - # Custom fields are expected to be strings, but sometimes the API sends + # Custom fields are expected to be strings, but sometimes the API sends # booleans. We cast those to strings to match the schema. if 'custom_fields' in row: row['custom_fields'] = self.transform_dict(row['custom_fields'], force_str=self.force_str) @@ -170,31 +171,31 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams class Agents(Stream): + """ + https://developer.freshdesk.com/api/#list_all_agents + """ tap_stream_id = 'agents' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'agents' class Companies(Stream): + """ + https://developer.freshdesk.com/api/#list_all_companies + """ tap_stream_id = 'companies' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'companies' class Groups(Stream): + """ + https://developer.freshdesk.com/api/#list_all_groups + """ tap_stream_id = 'groups' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'groups' class Roles(Stream): + """ + https://developer.freshdesk.com/api/#list_all_roles + """ tap_stream_id = 'roles' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'roles' class DateFilteredStream(Stream): @@ -221,17 +222,17 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams singer.write_bookmark(state, child, "updated_at", bm) class Tickets(DateFilteredStream): + """ + https://developer.freshdesk.com/api/#list_all_tickets + """ tap_stream_id = 'tickets' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'tickets' children = ['conversations', 'satisfaction_ratings', 'time_entries'] id_key = 'id' date_filter = 'updated_since' params = { "per_page": PAGE_SIZE, - 'order_by': replication_keys[0], + 'order_by': "updated_at", 'order_type': "asc", 'include': "requester,company,stats" } @@ -239,10 +240,10 @@ class Tickets(DateFilteredStream): filters = [None, 'deleted', 'spam'] class Contacts(DateFilteredStream): + """ + https://developer.freshdesk.com/api/#list_all_contacts + """ tap_stream_id = 'contacts' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'contacts' id_key = 'id' date_filter = '_updated_since' @@ -277,27 +278,27 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams return max_bookmark class Conversations(ChildStream): + """ + https://developer.freshdesk.com/api/#list_all_ticket_notes + """ tap_stream_id = 'conversations' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'tickets/{}/conversations' parent = 'tickets' class SatisfactionRatings(ChildStream): + """ + https://developer.freshdesk.com/api/#view_ticket_satisfaction_ratings + """ tap_stream_id = 'satisfaction_ratings' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'tickets/{}/satisfaction_ratings' parent = 'tickets' class TimeEntries(ChildStream): + """ + https://developer.freshdesk.com/api/#list_all_ticket_timeentries + """ tap_stream_id = 'time_entries' - key_properties = ['id'] - replication_keys = ['updated_at'] - replication_method = 'INCREMENTAL' path = 'tickets/{}/time_entries' parent = 'tickets' From 69cb3abbf81ea6e224cb2c1a699927603cecc880 Mon Sep 17 00:00:00 2001 From: prijendev Date: Tue, 30 Aug 2022 12:13:41 +0530 Subject: [PATCH 24/32] Resolved pylint errors. --- tap_freshdesk/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index 3bf4220..c020911 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -1,9 +1,10 @@ import time +import collections +import functools import backoff import requests import singer -import collections -import functools + LOGGER = singer.get_logger() BASE_URL = "https://{}.freshdesk.com" From 805c83015fe29b8d8f2ca25823942167e971a219 Mon Sep 17 00:00:00 2001 From: namrata270998 Date: Tue, 6 Sep 2022 07:11:37 +0000 Subject: [PATCH 25/32] used singer utils ratelimit and resolved review comments --- tap_freshdesk/client.py | 28 ++-------------------------- tap_freshdesk/sync.py | 9 ++++++--- tests/unittests/test_client.py | 18 ------------------ tests/unittests/test_sync.py | 6 +++--- 4 files changed, 11 insertions(+), 50 deletions(-) diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index c020911..e914564 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -1,38 +1,14 @@ import time -import collections -import functools import backoff import requests import singer +from singer import utils LOGGER = singer.get_logger() BASE_URL = "https://{}.freshdesk.com" -def ratelimit(limit, every): - """ - Keeps minimum seconds(every) of time between two request calls. - """ - def limitdecorator(fn): - times = collections.deque() - - @functools.wraps(fn) - def wrapper(*args, **kwargs): - if len(times) >= limit: - t0 = times.pop() # Takes last call time - t = time.time() # current time - sleep_time = every - (t - t0) # If difference is < every(time) - if sleep_time > 0: # Sleep for remaining time - time.sleep(sleep_time) - - times.appendleft(time.time()) # Appending current time to list - return fn(*args, **kwargs) - - return wrapper - - return limitdecorator - class FreshdeskClient: """ The client class is used for making REST calls to the Freshdesk API. @@ -62,7 +38,7 @@ def check_access_token(self): max_tries=5, giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, factor=2) - @ratelimit(1, 2) + @utils.ratelimit(1, 2) def request(self, url, params=None): """ Call rest API and return the response in case of status code 200. diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index 0ef1822..555d2e5 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -54,11 +54,14 @@ def get_ordered_stream_list(currently_syncing, streams_to_sync): def get_stream_to_sync(selected_streams): """ - Get the streams for which the sync function should be called(the parent in case of selected child streams). + Get the streams for which the sync function should be called + (the parent in case of selected child streams). """ streams_to_sync = [] for stream_name, stream_obj in STREAMS.items(): - if (stream_name in selected_streams) or any(child in selected_streams for child in stream_obj.children): + if ((stream_name in selected_streams) or + any(child in selected_streams for child in stream_obj.children)) and ( + stream_obj.parent is None): streams_to_sync.append(stream_name) return streams_to_sync @@ -76,7 +79,7 @@ def sync(client, config, state, catalog): singer.write_state(state) currently_syncing = singer.get_currently_syncing(state) streams_to_sync = get_ordered_stream_list(currently_syncing, streams_to_sync) - for stream in filter(lambda x: STREAMS[x]().parent is None, streams_to_sync): + for stream in streams_to_sync: stream_obj = STREAMS[stream]() write_schemas(stream, catalog, selected_streams) diff --git a/tests/unittests/test_client.py b/tests/unittests/test_client.py index 28e687c..9389615 100644 --- a/tests/unittests/test_client.py +++ b/tests/unittests/test_client.py @@ -33,21 +33,3 @@ def test_access_token(self, mock_request): # Verify that for check access token, `request` method was called self.assertTrue(mock_request.called) mock_request.assert_called_with("https://sampleDomain.freshdesk.com/api/v2/roles", mock.ANY) - - -class TestRateLimit(unittest.TestCase): - """Test `ratelimit` decorator.""" - - @mock.patch("requests.Session.send", return_value = get_response(200)) - @mock.patch("time.sleep") - def test_ratelimit(self, mock_sleep, mock_request): - """ - Test that for consecutive request calls `time.sleep` is called, - if requests calls are made in a very short time(2 seconds). - """ - _client = client.FreshdeskClient({"api_key": "API_KEY"}) - for _ in range(10): - _client.request("https://SAMPLE.URL") - - # Verify that `time.sleep` was called - self.assertTrue(mock_sleep.called) diff --git a/tests/unittests/test_sync.py b/tests/unittests/test_sync.py index 5d2bb38..17da4fc 100644 --- a/tests/unittests/test_sync.py +++ b/tests/unittests/test_sync.py @@ -101,13 +101,13 @@ class TestGetStreamsToSync(unittest.TestCase): @parameterized.expand([ ['test_parent_selected', ["tickets"], ["tickets"]], - ['test_child_selected', ["conversations", "satisfaction_ratings"], ["conversations", "satisfaction_ratings", "tickets"]], - ['test_both_selected', ["conversations", "roles", "tickets"], ["conversations", "roles", "tickets"]] + ['test_child_selected', ["conversations", "satisfaction_ratings"], ["tickets"]], + ['test_both_selected', ["conversations", "roles", "tickets"], ["roles", "tickets"]] ]) def test_sync_streams(self, name, selected_streams, expected_streams): """ Test that if an only child is selected in the catalog, - then `get_stream_to_sync` returns the parent stream also. + then `get_stream_to_sync` returns the parent streams if selected stream is child. """ sync_streams = get_stream_to_sync(selected_streams) From 042885ae5f6cf4152503c73fb23d4517c45b57d3 Mon Sep 17 00:00:00 2001 From: prijendev Date: Mon, 12 Sep 2022 14:16:08 +0530 Subject: [PATCH 26/32] Resolved pylint error. --- tap_freshdesk/client.py | 237 +++++++++++++++++++++++++++++++++++----- 1 file changed, 211 insertions(+), 26 deletions(-) diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index e914564..f111a36 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -1,63 +1,248 @@ -import time + import backoff import requests -import singer +import pendulum from singer import utils - +import singer +import time LOGGER = singer.get_logger() -BASE_URL = "https://{}.freshdesk.com" +BASE_ID_URL = "https://id.getharvest.com/api/v2/" +BASE_API_URL = "https://api.harvestapp.com/v2/" +# timeout request after 300 seconds +REQUEST_TIMEOUT = 300 + +class HarvestError(Exception): + pass + +class Server5xxError(Exception): + pass + +class HarvestBadRequestError(HarvestError): + pass + +class HarvestUnauthorizedError(HarvestError): + pass + +class HarvestNotFoundError(HarvestError): + pass + +class HarvestForbiddenError(HarvestError): + pass + +class HarvestUnprocessableEntityError(HarvestError): + pass + +class HarvestRateLimitExceeededError(HarvestError): + pass -class FreshdeskClient: +class HarvestInternalServiceError(Server5xxError): + pass + +ERROR_CODE_EXCEPTION_MAPPING = { + 400: { + "raise_exception": HarvestBadRequestError, + "message": "The request is missing or has a bad parameter." + }, + 401: { + "raise_exception": HarvestUnauthorizedError, + "message": "Invalid authorization credentials." + }, + 403: { + "raise_exception": HarvestForbiddenError, + "message": "User does not have permission to access the resource or "\ + "related feature is disabled." + }, + 404: { + "raise_exception": HarvestNotFoundError, + "message": "The resource you have specified cannot be found." + }, + 422: { + "raise_exception": HarvestUnprocessableEntityError, + "message": "The request was not able to process right now." + }, + 429: { + "raise_exception": HarvestRateLimitExceeededError, + "message": "API rate limit exceeded." + }, + 500: { + "raise_exception": HarvestInternalServiceError, + "message": "An error has occurred at Harvest's end." + } +} + +def raise_for_error(response): """ - The client class is used for making REST calls to the Freshdesk API. + Forming a custom response message for raising an exception. + """ + + error_code = response.status_code + try: + response_json = response.json() + except Exception: + response_json = {} + + if error_code not in ERROR_CODE_EXCEPTION_MAPPING and error_code > 500: + # Raise `Server5xxError` for all 5xx unknown error + exc = Server5xxError + else: + exc = ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("raise_exception", HarvestError) + error_message = response_json.get("error_description", ERROR_CODE_EXCEPTION_MAPPING.get( + error_code, {}).get("message", "An Unknown Error occurred.")) + message = "HTTP-error-code: {}, Error: {}".format(error_code, error_message) + + raise exc(message) from None + +class HarvestClient: #pylint: disable=too-many-instance-attributes + """ + The client class is used for making REST calls to the Harvest API. """ def __init__(self, config): self.config = config + self._client_id = config['client_id'] + self._client_secret = config['client_secret'] + self._refresh_token = config['refresh_token'] + self._user_agent = config['user_agent'] + self._account_id = None self.session = requests.Session() - self.base_url = BASE_URL.format(config.get("domain")) + self._access_token = None + self._expires_at = None + self.request_timeout = self.get_request_timeout() def __enter__(self): - self.check_access_token() - return self + self._refresh_access_token() def __exit__(self, exception_type, exception_value, traceback): - # Kill the session instance. self.session.close() - def check_access_token(self): + def get_request_timeout(self): """ - Check if the access token is valid. + Get timeout value from config, if the value is passed. + Else return the default value. """ - self.request(self.base_url+"/api/v2/roles", {"per_page": 1, "page": 1}) + # Get `request_timeout` value from config. + config_request_timeout = self.config.get('request_timeout') + + # If timeout is not passed in the config then set it to the default(300 seconds) + if config_request_timeout is None: + return REQUEST_TIMEOUT + + # If config request_timeout is other than 0,"0" or invalid string then use request_timeout + if ((type(config_request_timeout) in [int, float]) or + (isinstance(config_request_timeout,str) and config_request_timeout.replace('.', '', 1).isdigit())) and float(config_request_timeout): + return float(config_request_timeout) + raise Exception("The entered timeout is invalid, it should be a valid none-zero integer.") @backoff.on_exception(backoff.expo, - (requests.exceptions.RequestException), + (HarvestRateLimitExceeededError, Server5xxError, + requests.Timeout, requests.ConnectionError), max_tries=5, - giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, factor=2) - @utils.ratelimit(1, 2) - def request(self, url, params=None): + def _refresh_access_token(self): """ - Call rest API and return the response in case of status code 200. + Create an access token using the refresh token. """ - headers = {} - if 'user_agent' in self.config: - headers['User-Agent'] = self.config['user_agent'] + LOGGER.info("Refreshing access token") + resp = self.session.request('POST', + url=BASE_ID_URL + 'oauth2/token', + data={ + 'client_id': self._client_id, + 'client_secret': self._client_secret, + 'refresh_token': self._refresh_token, + 'grant_type': 'refresh_token', + }, + headers={"User-Agent": self._user_agent}) - req = requests.Request('GET', url, params=params, auth=(self.config['api_key'], ""), headers=headers).prepare() - LOGGER.info("GET %s", req.url) - response = self.session.send(req) + expires_in_seconds = resp.json().get('expires_in', 17 * 60 * 60) + self._expires_at = pendulum.now().add(seconds=expires_in_seconds) + resp_json = {} + try: + resp_json = resp.json() + self._access_token = resp_json['access_token'] + # If an access token is not provided in response, raise an error + except KeyError: + if resp_json.get('error'): + LOGGER.critical(resp_json.get('error')) + if resp_json.get('error_description'): + LOGGER.critical(resp_json.get('error_description')) + raise_for_error(resp) + LOGGER.info("Got refreshed access token") + + def get_access_token(self): + """ + Return access token if available or generate one. + """ + if self._access_token is not None and self._expires_at > pendulum.now(): + return self._access_token + + self._refresh_access_token() + return self._access_token + + @backoff.on_exception(backoff.expo, + (HarvestRateLimitExceeededError, Server5xxError, + requests.Timeout, requests.ConnectionError), + max_tries=5, + factor=2) + def get_account_id(self): + """ + Get the account Id of the Active Harvest account. + It will throw an exception if no active harvest account is found. + """ + if self._account_id is not None: + return self._account_id + + response = self.session.request('GET', + url=BASE_ID_URL + 'accounts', + headers={'Authorization': 'Bearer ' + self._access_token, + 'User-Agent': self._user_agent}, + timeout=self.request_timeout) # Call the function again if the rate limit is exceeded if 'Retry-After' in response.headers: retry_after = int(response.headers['Retry-After']) LOGGER.info("Rate limit reached. Sleeping for %s seconds", retry_after) time.sleep(retry_after) + return self.get_account_id() + + if response.status_code != 200: + raise_for_error(response) + + if response.json().get('accounts'): + self._account_id = str(response.json()['accounts'][0]['id']) + return self._account_id + + raise Exception("No Active Harvest Account found") from None + + @backoff.on_exception(backoff.expo, + (HarvestRateLimitExceeededError, Server5xxError, + requests.Timeout, requests.ConnectionError), + max_tries=5, + factor=2) + @utils.ratelimit(100, 15) + def request(self, url, params=None): + """ + Call rest API and return the response in case of status code 200. + """ + params = params or {} + access_token = self.get_access_token() + headers = {"Accept": "application/json", + "Harvest-Account-Id": self.get_account_id(), + "Authorization": "Bearer " + access_token, + "User-Agent": self._user_agent} + req = requests.Request("GET", url=url, params=params, headers=headers).prepare() + LOGGER.info("GET %s", req.url) + resp = self.session.send(req, timeout=self.request_timeout) + + # Call the function again if the rate limit is exceeded + if 'Retry-After' in resp.headers: + retry_after = int(resp.headers['Retry-After']) + LOGGER.info("Rate limit reached. Sleeping for %s seconds", retry_after) + time.sleep(retry_after) return self.request(url, params) - response.raise_for_status() + if resp.status_code != 200: + raise_for_error(resp) - return response.json() + return resp.json() From f409ae66b6eec6ae6561df605f5e8c78eb27715b Mon Sep 17 00:00:00 2001 From: prijendev Date: Mon, 12 Sep 2022 14:35:58 +0530 Subject: [PATCH 27/32] Resolved parent child sync issue. --- tap_freshdesk/streams.py | 17 ++++++++++------- tap_freshdesk/sync.py | 17 +++++++++++++---- tests/unittests/test_streams.py | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index d8db0c7..32b10a7 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -9,22 +9,22 @@ DATETIME_FMT = "%Y-%m-%dT%H:%M:%SZ" -def get_min_bookmark(stream, streams_to_sync, start_date, state, bookmark_key, predefined_filter=None): +def get_min_bookmark(stream, selected_streams, bookmark, start_date, state, bookmark_key, predefined_filter=None): """ Get the minimum bookmark from the parent and its corresponding child bookmarks. """ stream_obj = STREAMS[stream]() - min_bookmark = dt.strftime(dt.now(), DATETIME_FMT) - if stream in streams_to_sync: + min_bookmark = bookmark + if stream in selected_streams: # Get minimum of stream's bookmark(start date in case of no bookmark) and min_bookmark if predefined_filter: stream = stream + '_' + predefined_filter min_bookmark = min(min_bookmark, get_bookmark(state, stream, bookmark_key, start_date)) # Iterate through all children and return minimum bookmark among all. - for child in filter(lambda x: x in streams_to_sync, stream_obj.children): - min_bookmark = min(min_bookmark, get_min_bookmark(child, streams_to_sync, start_date, state, bookmark_key)) + for child in stream_obj.children: + min_bookmark = min(min_bookmark, get_min_bookmark(child, selected_streams, bookmark, start_date, state, bookmark_key)) return min_bookmark @@ -143,8 +143,9 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams LOGGER.info("Syncing %s with filter %s", self.tap_stream_id, predefined_filter) params[self.filter_keyword] = predefined_filter + current_time = dt.strftime(dt.now(), DATETIME_FMT) # Get the minimum bookmark from the parent and the child streams - min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], predefined_filter) + min_bookmark = get_min_bookmark(self.tap_stream_id, selected_streams, current_time, start_date, state, self.replication_keys[0], predefined_filter) max_bookmark = min_bookmark # Initialize the child_max_bookmarks dictionary child_max_bookmarks = {} @@ -260,8 +261,10 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams params = copy.deepcopy(self.params) # Build the url for the request full_url = self.build_url(client.base_url, self.parent_id) + + current_time = dt.strftime(dt.now(), DATETIME_FMT) # Get the min bookmark from the parent and the child streams - min_bookmark = get_min_bookmark(self.tap_stream_id, streams_to_sync, start_date, state, self.replication_keys[0], None) + min_bookmark = get_min_bookmark(self.tap_stream_id, selected_streams, current_time, start_date, state, self.replication_keys[0], None) max_bookmark = min_bookmark params['page'] = 1 self.paginate = True diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index 555d2e5..92521bb 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -58,11 +58,20 @@ def get_stream_to_sync(selected_streams): (the parent in case of selected child streams). """ streams_to_sync = [] - for stream_name, stream_obj in STREAMS.items(): - if ((stream_name in selected_streams) or - any(child in selected_streams for child in stream_obj.children)) and ( - stream_obj.parent is None): + + # Loop thru all selected streams + for stream_name in selected_streams: + stream_obj = STREAMS[stream_name] + # If the stream has a parent_stream, then it is a child stream + parent_stream = hasattr(stream_obj, 'parent') and stream_obj.parent + + # Append selected parent streams + if not parent_stream: streams_to_sync.append(stream_name) + else: + # Append un-selected parent streams of selected children + if parent_stream not in selected_streams and parent_stream not in streams_to_sync: + streams_to_sync.append(parent_stream) return streams_to_sync def sync(client, config, state, catalog): diff --git a/tests/unittests/test_streams.py b/tests/unittests/test_streams.py index 789bd6c..5c81d34 100644 --- a/tests/unittests/test_streams.py +++ b/tests/unittests/test_streams.py @@ -1,8 +1,9 @@ import unittest from unittest import mock from parameterized import parameterized -from tap_freshdesk.streams import Agents, Tickets +from tap_freshdesk.streams import Agents, Tickets, get_min_bookmark +START_DATE = '2022-09-00T00:00:00.000000Z' class TestSyncObj(unittest.TestCase): """ @@ -145,3 +146,33 @@ def test_transform(self, name, dictionary, expected_list, kwargs): # Verify returned list is expected self.assertEqual(returned_list, expected_list) + +class TestStreamsUtils(unittest.TestCase): + """ + Test utility functions of streams module. + """ + + @parameterized.expand([ + ['test_parent_only_with_state', ['tickets'], {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], + ['test_child_only_with_state', ['conversations'], {'bookmarks': {'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], + ['test_parent_only_without_state', ['tickets'], {}, START_DATE], + ['test_child_only_without_state', ['tickets'], {}, START_DATE], + ['test_min_parent_bookmark_single_child', ['tickets', 'conversations'], + {'bookmarks': {'tickets': {'updated_at': '2022-07-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], + ['test_min_child_bookmark_single_child', ['tickets', 'conversations'], + {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-07-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], + ['test_min_child_bookmark_multiple_child', ['tickets', 'conversations', 'time_entries'], + {'bookmarks': {'tickets': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], + ['test_multiple_child_only_bookmark', ['tickets', 'conversations', 'time_entries'], + {'bookmarks': {'time_entries': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], + ['test_multiple_child_bookmark', ['tickets', 'conversations', 'time_entries'], + {'bookmarks': {'time_entries': {'updated_at': '2022-06-30T00:00:00.000000Z'}, 'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-11-30T00:00:00.000000Z'}}}, '2022-06-30T00:00:00.000000Z'] + + ]) + def test_get_min_bookmark(self, name, selected_streams, state, expected_bookmark): + """ + Test that `get_min_bookmark` function return minimum bookmark value among the parent and child streams. + """ + current_time = '2022-09-30T00:00:00.000000Z' + actual_bookmark = get_min_bookmark('tickets', selected_streams, current_time, START_DATE, state, 'updated_at') + self.assertEqual(actual_bookmark, expected_bookmark) \ No newline at end of file From b44017c458b21c0aec85c16fa7cb0675cacf5534 Mon Sep 17 00:00:00 2001 From: prijendev Date: Mon, 12 Sep 2022 14:42:32 +0530 Subject: [PATCH 28/32] Revert "Resolved pylint error." This reverts commit 042885ae5f6cf4152503c73fb23d4517c45b57d3. --- tap_freshdesk/client.py | 237 +++++----------------------------------- 1 file changed, 26 insertions(+), 211 deletions(-) diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index f111a36..e914564 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -1,248 +1,63 @@ - +import time import backoff import requests -import pendulum -from singer import utils import singer -import time - -LOGGER = singer.get_logger() - -BASE_ID_URL = "https://id.getharvest.com/api/v2/" -BASE_API_URL = "https://api.harvestapp.com/v2/" -# timeout request after 300 seconds -REQUEST_TIMEOUT = 300 - -class HarvestError(Exception): - pass - -class Server5xxError(Exception): - pass - -class HarvestBadRequestError(HarvestError): - pass - -class HarvestUnauthorizedError(HarvestError): - pass - -class HarvestNotFoundError(HarvestError): - pass - -class HarvestForbiddenError(HarvestError): - pass - -class HarvestUnprocessableEntityError(HarvestError): - pass - -class HarvestRateLimitExceeededError(HarvestError): - pass - -class HarvestInternalServiceError(Server5xxError): - pass - -ERROR_CODE_EXCEPTION_MAPPING = { - 400: { - "raise_exception": HarvestBadRequestError, - "message": "The request is missing or has a bad parameter." - }, - 401: { - "raise_exception": HarvestUnauthorizedError, - "message": "Invalid authorization credentials." - }, - 403: { - "raise_exception": HarvestForbiddenError, - "message": "User does not have permission to access the resource or "\ - "related feature is disabled." - }, - 404: { - "raise_exception": HarvestNotFoundError, - "message": "The resource you have specified cannot be found." - }, - 422: { - "raise_exception": HarvestUnprocessableEntityError, - "message": "The request was not able to process right now." - }, - 429: { - "raise_exception": HarvestRateLimitExceeededError, - "message": "API rate limit exceeded." - }, - 500: { - "raise_exception": HarvestInternalServiceError, - "message": "An error has occurred at Harvest's end." - } -} - -def raise_for_error(response): - """ - Forming a custom response message for raising an exception. - """ +from singer import utils - error_code = response.status_code - try: - response_json = response.json() - except Exception: - response_json = {} - if error_code not in ERROR_CODE_EXCEPTION_MAPPING and error_code > 500: - # Raise `Server5xxError` for all 5xx unknown error - exc = Server5xxError - else: - exc = ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("raise_exception", HarvestError) - error_message = response_json.get("error_description", ERROR_CODE_EXCEPTION_MAPPING.get( - error_code, {}).get("message", "An Unknown Error occurred.")) - message = "HTTP-error-code: {}, Error: {}".format(error_code, error_message) +LOGGER = singer.get_logger() +BASE_URL = "https://{}.freshdesk.com" - raise exc(message) from None -class HarvestClient: #pylint: disable=too-many-instance-attributes +class FreshdeskClient: """ - The client class is used for making REST calls to the Harvest API. + The client class is used for making REST calls to the Freshdesk API. """ def __init__(self, config): self.config = config - self._client_id = config['client_id'] - self._client_secret = config['client_secret'] - self._refresh_token = config['refresh_token'] - self._user_agent = config['user_agent'] - self._account_id = None self.session = requests.Session() - self._access_token = None - self._expires_at = None - self.request_timeout = self.get_request_timeout() + self.base_url = BASE_URL.format(config.get("domain")) def __enter__(self): - self._refresh_access_token() + self.check_access_token() + return self def __exit__(self, exception_type, exception_value, traceback): + # Kill the session instance. self.session.close() - def get_request_timeout(self): - """ - Get timeout value from config, if the value is passed. - Else return the default value. - """ - # Get `request_timeout` value from config. - config_request_timeout = self.config.get('request_timeout') - - # If timeout is not passed in the config then set it to the default(300 seconds) - if config_request_timeout is None: - return REQUEST_TIMEOUT - - # If config request_timeout is other than 0,"0" or invalid string then use request_timeout - if ((type(config_request_timeout) in [int, float]) or - (isinstance(config_request_timeout,str) and config_request_timeout.replace('.', '', 1).isdigit())) and float(config_request_timeout): - return float(config_request_timeout) - raise Exception("The entered timeout is invalid, it should be a valid none-zero integer.") - - @backoff.on_exception(backoff.expo, - (HarvestRateLimitExceeededError, Server5xxError, - requests.Timeout, requests.ConnectionError), - max_tries=5, - factor=2) - def _refresh_access_token(self): - """ - Create an access token using the refresh token. - """ - LOGGER.info("Refreshing access token") - resp = self.session.request('POST', - url=BASE_ID_URL + 'oauth2/token', - data={ - 'client_id': self._client_id, - 'client_secret': self._client_secret, - 'refresh_token': self._refresh_token, - 'grant_type': 'refresh_token', - }, - headers={"User-Agent": self._user_agent}) - - expires_in_seconds = resp.json().get('expires_in', 17 * 60 * 60) - self._expires_at = pendulum.now().add(seconds=expires_in_seconds) - resp_json = {} - try: - resp_json = resp.json() - self._access_token = resp_json['access_token'] - # If an access token is not provided in response, raise an error - except KeyError: - if resp_json.get('error'): - LOGGER.critical(resp_json.get('error')) - if resp_json.get('error_description'): - LOGGER.critical(resp_json.get('error_description')) - raise_for_error(resp) - LOGGER.info("Got refreshed access token") - - def get_access_token(self): + def check_access_token(self): """ - Return access token if available or generate one. + Check if the access token is valid. """ - if self._access_token is not None and self._expires_at > pendulum.now(): - return self._access_token - - self._refresh_access_token() - return self._access_token + self.request(self.base_url+"/api/v2/roles", {"per_page": 1, "page": 1}) @backoff.on_exception(backoff.expo, - (HarvestRateLimitExceeededError, Server5xxError, - requests.Timeout, requests.ConnectionError), + (requests.exceptions.RequestException), max_tries=5, + giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, factor=2) - def get_account_id(self): - """ - Get the account Id of the Active Harvest account. - It will throw an exception if no active harvest account is found. - """ - if self._account_id is not None: - return self._account_id - - response = self.session.request('GET', - url=BASE_ID_URL + 'accounts', - headers={'Authorization': 'Bearer ' + self._access_token, - 'User-Agent': self._user_agent}, - timeout=self.request_timeout) - - # Call the function again if the rate limit is exceeded - if 'Retry-After' in response.headers: - retry_after = int(response.headers['Retry-After']) - LOGGER.info("Rate limit reached. Sleeping for %s seconds", retry_after) - time.sleep(retry_after) - return self.get_account_id() - - if response.status_code != 200: - raise_for_error(response) - - if response.json().get('accounts'): - self._account_id = str(response.json()['accounts'][0]['id']) - return self._account_id - - raise Exception("No Active Harvest Account found") from None - - @backoff.on_exception(backoff.expo, - (HarvestRateLimitExceeededError, Server5xxError, - requests.Timeout, requests.ConnectionError), - max_tries=5, - factor=2) - @utils.ratelimit(100, 15) + @utils.ratelimit(1, 2) def request(self, url, params=None): """ Call rest API and return the response in case of status code 200. """ - params = params or {} - access_token = self.get_access_token() - headers = {"Accept": "application/json", - "Harvest-Account-Id": self.get_account_id(), - "Authorization": "Bearer " + access_token, - "User-Agent": self._user_agent} - req = requests.Request("GET", url=url, params=params, headers=headers).prepare() + headers = {} + if 'user_agent' in self.config: + headers['User-Agent'] = self.config['user_agent'] + + req = requests.Request('GET', url, params=params, auth=(self.config['api_key'], ""), headers=headers).prepare() LOGGER.info("GET %s", req.url) - resp = self.session.send(req, timeout=self.request_timeout) + response = self.session.send(req) # Call the function again if the rate limit is exceeded - if 'Retry-After' in resp.headers: - retry_after = int(resp.headers['Retry-After']) + if 'Retry-After' in response.headers: + retry_after = int(response.headers['Retry-After']) LOGGER.info("Rate limit reached. Sleeping for %s seconds", retry_after) time.sleep(retry_after) return self.request(url, params) - if resp.status_code != 200: - raise_for_error(resp) + response.raise_for_status() - return resp.json() + return response.json() From 5a8a71abf25c9bbfd39676da310b5ab1ed088101 Mon Sep 17 00:00:00 2001 From: prijendev Date: Mon, 12 Sep 2022 14:48:54 +0530 Subject: [PATCH 29/32] Updated unit test for get_min_bookmark function. --- tests/unittests/test_bookmark_handling.py | 28 ++++++++++++++------ tests/unittests/test_streams.py | 32 ----------------------- 2 files changed, 20 insertions(+), 40 deletions(-) diff --git a/tests/unittests/test_bookmark_handling.py b/tests/unittests/test_bookmark_handling.py index a1bca25..ffdba5c 100644 --- a/tests/unittests/test_bookmark_handling.py +++ b/tests/unittests/test_bookmark_handling.py @@ -2,6 +2,7 @@ from parameterized import parameterized from tap_freshdesk.streams import get_min_bookmark, get_schema, write_bookmark +START_DATE = '2022-09-00T00:00:00.000000Z' class TestGetMinBookmark(unittest.TestCase): """ @@ -19,18 +20,29 @@ class TestGetMinBookmark(unittest.TestCase): } @parameterized.expand([ - ["with_child_selected", "tickets", ["tickets", "satisfaction_ratings"], "updated_at", "2022-03-14T00:00:00Z"], - ["only_children_selected", "tickets", ["satisfaction_ratings","conversations", "time_entries"], "updated_at", "2022-03-01T00:00:00Z"], - ["only_parent_selected", "tickets", ["tickets"], "updated_at", "2022-03-29T00:00:00Z"], + ['test_parent_only_with_state', ['tickets'], {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], + ['test_child_only_with_state', ['conversations'], {'bookmarks': {'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], + ['test_parent_only_without_state', ['tickets'], {}, START_DATE], + ['test_child_only_without_state', ['tickets'], {}, START_DATE], + ['test_min_parent_bookmark_single_child', ['tickets', 'conversations'], + {'bookmarks': {'tickets': {'updated_at': '2022-07-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], + ['test_min_child_bookmark_single_child', ['tickets', 'conversations'], + {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-07-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], + ['test_min_child_bookmark_multiple_child', ['tickets', 'conversations', 'time_entries'], + {'bookmarks': {'tickets': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], + ['test_multiple_child_only_bookmark', ['tickets', 'conversations', 'time_entries'], + {'bookmarks': {'time_entries': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], + ['test_multiple_child_bookmark', ['tickets', 'conversations', 'time_entries'], + {'bookmarks': {'time_entries': {'updated_at': '2022-06-30T00:00:00.000000Z'}, 'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-11-30T00:00:00.000000Z'}}}, '2022-06-30T00:00:00.000000Z'] + ]) - def test_min_bookmark(self, name, stream_name, stream_to_sync, bookmark_key, expected_bookmark): + def test_min_bookmark(self, name, selected_streams, state, expected_bookmark): """ Test that `get_min_bookmark` method returns the minimum bookmark from the parent and its corresponding child bookmarks. """ - min_bookmark = get_min_bookmark(stream_name, stream_to_sync, self.start_date, self.state, bookmark_key) - - # Verify returned bookmark is expected - self.assertEqual(min_bookmark, expected_bookmark) + current_time = '2022-09-30T00:00:00.000000Z' + actual_bookmark = get_min_bookmark('tickets', selected_streams, current_time, START_DATE, state, 'updated_at') + self.assertEqual(actual_bookmark, expected_bookmark) class TestGetSchema(unittest.TestCase): diff --git a/tests/unittests/test_streams.py b/tests/unittests/test_streams.py index 5c81d34..11a5b41 100644 --- a/tests/unittests/test_streams.py +++ b/tests/unittests/test_streams.py @@ -3,8 +3,6 @@ from parameterized import parameterized from tap_freshdesk.streams import Agents, Tickets, get_min_bookmark -START_DATE = '2022-09-00T00:00:00.000000Z' - class TestSyncObj(unittest.TestCase): """ Test `sync_obj` mehtod of stream. @@ -146,33 +144,3 @@ def test_transform(self, name, dictionary, expected_list, kwargs): # Verify returned list is expected self.assertEqual(returned_list, expected_list) - -class TestStreamsUtils(unittest.TestCase): - """ - Test utility functions of streams module. - """ - - @parameterized.expand([ - ['test_parent_only_with_state', ['tickets'], {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], - ['test_child_only_with_state', ['conversations'], {'bookmarks': {'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], - ['test_parent_only_without_state', ['tickets'], {}, START_DATE], - ['test_child_only_without_state', ['tickets'], {}, START_DATE], - ['test_min_parent_bookmark_single_child', ['tickets', 'conversations'], - {'bookmarks': {'tickets': {'updated_at': '2022-07-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], - ['test_min_child_bookmark_single_child', ['tickets', 'conversations'], - {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-07-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], - ['test_min_child_bookmark_multiple_child', ['tickets', 'conversations', 'time_entries'], - {'bookmarks': {'tickets': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], - ['test_multiple_child_only_bookmark', ['tickets', 'conversations', 'time_entries'], - {'bookmarks': {'time_entries': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], - ['test_multiple_child_bookmark', ['tickets', 'conversations', 'time_entries'], - {'bookmarks': {'time_entries': {'updated_at': '2022-06-30T00:00:00.000000Z'}, 'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-11-30T00:00:00.000000Z'}}}, '2022-06-30T00:00:00.000000Z'] - - ]) - def test_get_min_bookmark(self, name, selected_streams, state, expected_bookmark): - """ - Test that `get_min_bookmark` function return minimum bookmark value among the parent and child streams. - """ - current_time = '2022-09-30T00:00:00.000000Z' - actual_bookmark = get_min_bookmark('tickets', selected_streams, current_time, START_DATE, state, 'updated_at') - self.assertEqual(actual_bookmark, expected_bookmark) \ No newline at end of file From a9daace7dfebd1ed52cd65bb30aca69a80fe7d13 Mon Sep 17 00:00:00 2001 From: prijendev Date: Thu, 15 Sep 2022 14:36:18 +0530 Subject: [PATCH 30/32] Added configurable pagination parameter. --- tap_freshdesk/client.py | 20 ++++++++++ tap_freshdesk/streams.py | 16 ++++---- tests/base.py | 4 +- tests/test_freshdesk_pagination.py | 24 +++++++----- tests/unittests/test_page_size.py | 62 ++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 19 deletions(-) create mode 100644 tests/unittests/test_page_size.py diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index e914564..d263208 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -7,6 +7,7 @@ LOGGER = singer.get_logger() BASE_URL = "https://{}.freshdesk.com" +DEFAULT_PAGE_SIZE = 100 class FreshdeskClient: @@ -18,6 +19,7 @@ def __init__(self, config): self.config = config self.session = requests.Session() self.base_url = BASE_URL.format(config.get("domain")) + self.page_size = self.get_page_size() def __enter__(self): self.check_access_token() @@ -27,6 +29,24 @@ def __exit__(self, exception_type, exception_value, traceback): # Kill the session instance. self.session.close() + def get_page_size(self): + """ + This function will get page size from config, + and will return the default value if an invalid page size is given. + """ + page_size = self.config.get('page_size') + + # return a default value if no page size is given in the config + if page_size is None: + return DEFAULT_PAGE_SIZE + + # Return integer value if the valid value is given + if (type(page_size) in [int, float] and page_size > 0) or \ + (isinstance(page_size, str) and page_size.replace('.', '', 1).isdigit() and (float(page_size) > 0)): + return int(float(page_size)) + # Raise an exception for 0, "0" or invalid value of page_size + raise Exception("The entered page size is invalid, it should be a valid integer.") + def check_access_token(self): """ Check if the access token is valid. diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index 32b10a7..d6f0721 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -5,7 +5,7 @@ LOGGER = singer.get_logger() -PAGE_SIZE = 100 +DEFAULT_PAGE_SIZE = 100 DATETIME_FMT = "%Y-%m-%dT%H:%M:%SZ" @@ -59,7 +59,7 @@ class Stream: children = [] path = '' headers = {} - params = {"per_page": PAGE_SIZE, "page": 1} + params = {"per_page": DEFAULT_PAGE_SIZE, "page": 1} paginate = True parent = None id_key = None @@ -92,13 +92,11 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ """ Transform the chunk of records according to the schema and write the records based on the bookmark. """ - params = copy.deepcopy(self.params) stream_catalog = get_schema(catalog, self.tap_stream_id) stream_id = self.tap_stream_id # Append the predefined filter in case it's present if predefined_filter: - params[self.filter_keyword] = predefined_filter stream_id = stream_id + '_' + predefined_filter bookmark = get_bookmark(state, stream_id, self.replication_keys[0], start_date) # The max bookmark so far for the child stream @@ -135,7 +133,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams """ The base stream class sync_obj() function to fetch records. """ - params = copy.deepcopy(self.params) + params = {**self.params, "per_page": client.page_size} full_url = self.build_url(client.base_url, self.parent_id) # Update the filter keyword in the params for date-filtered streams @@ -160,7 +158,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams # Paginate through the request while self.paginate: data = client.request(full_url, params) - self.paginate = len(data) >= PAGE_SIZE + self.paginate = len(data) >= client.page_size params['page'] += 1 max_bookmark, child_max_bookmarks = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, child_max_bookmarks, predefined_filter) write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) @@ -232,7 +230,7 @@ class Tickets(DateFilteredStream): id_key = 'id' date_filter = 'updated_since' params = { - "per_page": PAGE_SIZE, + "per_page": DEFAULT_PAGE_SIZE, 'order_by': "updated_at", 'order_type': "asc", 'include': "requester,company,stats" @@ -258,7 +256,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams """ The child stream sync_obj() method to sync the child records """ - params = copy.deepcopy(self.params) + params = {**self.params, "per_page": client.page_size} # Build the url for the request full_url = self.build_url(client.base_url, self.parent_id) @@ -273,7 +271,7 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams # Paginate through the records while self.paginate: data = client.request(full_url, params) - self.paginate = len(data) >= PAGE_SIZE + self.paginate = len(data) >= client.page_size params['page'] += 1 # Write the records based on the bookmark and return the max_bookmark for the page bookmark, _ = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, None) diff --git a/tests/base.py b/tests/base.py index c7307b1..581dc63 100644 --- a/tests/base.py +++ b/tests/base.py @@ -18,6 +18,7 @@ class FreshdeskBaseTest(unittest.TestCase): FULL = "FULL_TABLE" start_date = "" + PAGE_SIZE = 100 START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z" # %H:%M:%SZ BOOKMARK_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" RECORD_REPLICATION_KEY_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" @@ -49,7 +50,8 @@ def get_properties(self, original: bool = True): :param original: set to false to change the start_date or end_date """ return_value = { - 'start_date' : '2019-01-04T00:00:00Z' + 'start_date' : '2019-01-04T00:00:00Z', + 'page_size': self.PAGE_SIZE } if original: return return_value diff --git a/tests/test_freshdesk_pagination.py b/tests/test_freshdesk_pagination.py index 12ad143..542fea8 100644 --- a/tests/test_freshdesk_pagination.py +++ b/tests/test_freshdesk_pagination.py @@ -13,15 +13,26 @@ def test_name(self): print("Pagination Test for tap-freshdesk") def test_run(self): + """ + Test streams with different page_size as per available records. + """ + # Not able to generate more data as roles stream requires pro account. + # So, updating page_sie according to data available. + test_streams = {'roles'} + self.run_pagination(test_streams, page_size = 2) - # Page size for pagination supported streams - page_size = 100 + # Setting page_size back to 100 for the rest of the streams. + self.run_pagination(self.expected_streams() - test_streams, page_size = 100) + + def run_pagination(self, expected_streams, page_size): + # Page size for pagination-supported streams + self.PAGE_SIZE = page_size # Instantiate connection conn_id = connections.ensure_connection(self) # To collect "time_entries", "satisfaction_ratings" pro account is needed. Skipping them for now. - expected_streams = self.expected_streams() - {"time_entries", "satisfaction_ratings"} + expected_streams = expected_streams - {"time_entries", "satisfaction_ratings"} found_catalogs = self.run_and_verify_check_mode(conn_id) @@ -41,12 +52,7 @@ def test_run(self): # Test by stream for stream in expected_streams: with self.subTest(stream=stream): - # Not able to generate more data as roles stream requires pro account. - # So, updating page_sie according to data available. - if stream == "roles": - page_size = 2 - else: - page_size = 100 + # Expected values expected_primary_keys = self.expected_primary_keys()[stream] diff --git a/tests/unittests/test_page_size.py b/tests/unittests/test_page_size.py new file mode 100644 index 0000000..76b0572 --- /dev/null +++ b/tests/unittests/test_page_size.py @@ -0,0 +1,62 @@ +import unittest +from parameterized import parameterized +import tap_freshdesk.client as client_ + +PAGE_SIZE_INT = 50 +PAGE_SIZE_STR_INT = "50" +PAGE_SIZE_STR_FLOAT = "50.0" +PAGE_SIZE_FLOAT = 50.0 +PAGE_SIZE_ZERO = 0 +PAGE_SIZE_STR_ZERO = "0" +PAGE_SIZE_INVALID_STRING = "abc" + + +class TestPageSizeValue(unittest.TestCase): + + @parameterized.expand([ + [PAGE_SIZE_INT, PAGE_SIZE_INT], + [PAGE_SIZE_STR_INT, PAGE_SIZE_INT], + [PAGE_SIZE_STR_FLOAT, PAGE_SIZE_INT], + [PAGE_SIZE_FLOAT, PAGE_SIZE_INT], + ]) + def test_page_size_for_valid_values(self, page_size_value, expected_value): + """ + Test the various values of page_size: + - For string, integer, float type of values, converts to float + - For null string, zero(string), zero(integer), takes default integer value + """ + config = {'domain': 'abc', "page_size": page_size_value} + client = client_.FreshdeskClient(config) + + # Verify the page_size is the same as the expected value + self.assertEqual(client.page_size, expected_value) + + @parameterized.expand([ + [PAGE_SIZE_INVALID_STRING], + [PAGE_SIZE_STR_ZERO], + [PAGE_SIZE_ZERO], + ]) + def test_page_size_for_invalid_values(self, page_size_value): + """ + Test the various values of page_size: + - For string, integer, float type of values, converts to float + - For null string, zero(string), zero(integer), takes default integer value + """ + + config = {'domain': 'abc', "page_size": page_size_value} + # Verify the tap raises Exception + with self.assertRaises(Exception) as e: + client_.FreshdeskClient(config) + + # Verify the tap raises an error with expected error message + self.assertEqual(str(e.exception), "The entered page size is invalid, it should be a valid integer.") + + def test_without_page_size(self): + """ + Test if no page size is given in config, default page_size will be considered. + """ + config = {'domain': 'abc'} + client = client_.FreshdeskClient(config) + + # Verify the page_size is the same as the default value + self.assertEqual(client.page_size, client_.DEFAULT_PAGE_SIZE) From 36ffa951649e0fa9ea91d42326913a120359c320 Mon Sep 17 00:00:00 2001 From: prijendev Date: Thu, 15 Sep 2022 17:07:47 +0530 Subject: [PATCH 31/32] Updated test_streams unit test case. --- tests/unittests/test_streams.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unittests/test_streams.py b/tests/unittests/test_streams.py index 11a5b41..0151e87 100644 --- a/tests/unittests/test_streams.py +++ b/tests/unittests/test_streams.py @@ -63,6 +63,7 @@ def test_stream(self, name, selected_streams, streams_to_sync, responses, writte state = {} client = mock.Mock() client.base_url = "" + client.page_size = 100 client.request.side_effect = responses catalog = [ {"schema":{}, "tap_stream_id": "tickets", "metadata": []}, @@ -97,6 +98,7 @@ def test_parent_child_both_selected(self, name, state, expected_state, written_r stream = Tickets() client = mock.Mock() client.base_url = "" + client.page_size = 100 client.request.side_effect = [ [{"id": i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [11,15,12]], # Tickets Response [{"id": 10+i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [13,24]], # conversations Response From ef05eaf80e7f0d6f37dd1eaa14f19dd8f23d2156 Mon Sep 17 00:00:00 2001 From: prijendev Date: Wed, 21 Sep 2022 10:40:17 +0530 Subject: [PATCH 32/32] Resolved autopep8 errors. --- .circleci/config.yml | 2 +- tap_freshdesk/__init__.py | 4 + tap_freshdesk/client.py | 2 +- tap_freshdesk/discover.py | 3 +- tap_freshdesk/schema.py | 14 +-- tap_freshdesk/streams.py | 55 ++++++++--- tap_freshdesk/sync.py | 12 ++- tests/unittests/test_bookmark_handling.py | 23 +++-- tests/unittests/test_check_access_token.py | 2 +- tests/unittests/test_client.py | 3 +- tests/unittests/test_currently_syncing.py | 21 ++-- tests/unittests/test_main.py | 21 ++-- tests/unittests/test_page_size.py | 2 + tests/unittests/test_streams.py | 107 ++++++++++++--------- tests/unittests/test_sync.py | 66 ++++++------- 15 files changed, 205 insertions(+), 132 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 24b4069..c889104 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -24,7 +24,7 @@ jobs: name: 'pylint' command: | source /usr/local/share/virtualenvs/tap-freshdesk/bin/activate - pylint tap_freshdesk --disable 'missing-module-docstring,missing-function-docstring,missing-class-docstring,line-too-long,invalid-name,too-many-lines,consider-using-f-string,too-many-arguments,too-many-locals' + pylint tap_freshdesk --disable 'missing-module-docstring,line-too-long,invalid-name,too-many-lines,consider-using-f-string,too-many-arguments,too-many-locals' - run: name: 'Unit Tests' command: | diff --git a/tap_freshdesk/__init__.py b/tap_freshdesk/__init__.py index 2413526..1ca6c9d 100644 --- a/tap_freshdesk/__init__.py +++ b/tap_freshdesk/__init__.py @@ -12,6 +12,9 @@ @utils.handle_top_exception(LOGGER) def main(): + """ + Run discover mode or sync mode. + """ args = utils.parse_args(REQUIRED_CONFIG_KEYS) config = args.config with FreshdeskClient(config) as client: @@ -23,5 +26,6 @@ def main(): if args.catalog else _discover() _sync(client, config, args.state, catalog.to_dict()) + if __name__ == "__main__": main() diff --git a/tap_freshdesk/client.py b/tap_freshdesk/client.py index d263208..9ac440c 100644 --- a/tap_freshdesk/client.py +++ b/tap_freshdesk/client.py @@ -42,7 +42,7 @@ def get_page_size(self): # Return integer value if the valid value is given if (type(page_size) in [int, float] and page_size > 0) or \ - (isinstance(page_size, str) and page_size.replace('.', '', 1).isdigit() and (float(page_size) > 0)): + (isinstance(page_size, str) and page_size.replace('.', '', 1).isdigit() and (float(page_size) > 0)): return int(float(page_size)) # Raise an exception for 0, "0" or invalid value of page_size raise Exception("The entered page size is invalid, it should be a valid integer.") diff --git a/tap_freshdesk/discover.py b/tap_freshdesk/discover.py index caebacb..c7fc073 100644 --- a/tap_freshdesk/discover.py +++ b/tap_freshdesk/discover.py @@ -4,6 +4,7 @@ LOGGER = singer.get_logger() + def discover(): """ Run the discovery mode, prepare the catalog file and return the catalog. @@ -25,7 +26,7 @@ def discover(): catalog.streams.append(CatalogEntry( stream=stream_name, tap_stream_id=stream_name, - key_properties= key_properties, + key_properties=key_properties, schema=schema, metadata=mdata )) diff --git a/tap_freshdesk/schema.py b/tap_freshdesk/schema.py index 8d33c3d..d980ab5 100644 --- a/tap_freshdesk/schema.py +++ b/tap_freshdesk/schema.py @@ -4,12 +4,14 @@ import singer from tap_freshdesk.streams import STREAMS + def get_abs_path(path): """ Get the absolute path for the schema files. """ return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) + def get_schemas(): """ Load the schema references, prepare metadata for each stream and return schema and metadata for the catalog. @@ -21,7 +23,7 @@ def get_schemas(): for stream_name, stream_metadata in STREAMS.items(): schema_path = get_abs_path('schemas/{}.json'.format(stream_name)) - with open(schema_path) as file: # pylint: disable=unspecified-encoding + with open(schema_path) as file: # pylint: disable=unspecified-encoding schema = json.load(file) schemas[stream_name] = schema @@ -30,11 +32,11 @@ def get_schemas(): replication_keys = (hasattr(stream_metadata, 'replication_keys') or None) and stream_metadata.replication_keys mdata = metadata.new() mdata = metadata.get_standard_metadata( - schema=schema, - key_properties = (hasattr(stream_metadata, 'key_properties') or None) and stream_metadata.key_properties, - valid_replication_keys = replication_keys, - replication_method = (hasattr(stream_metadata, 'replication_method') or None) and stream_metadata.replication_method - ) + schema=schema, + key_properties=(hasattr(stream_metadata, 'key_properties') or None) and stream_metadata.key_properties, + valid_replication_keys=replication_keys, + replication_method=(hasattr(stream_metadata, 'replication_method') or None) + and stream_metadata.replication_method) mdata = metadata.to_map(mdata) # Loop through all keys and make replication keys of automatic inclusion diff --git a/tap_freshdesk/streams.py b/tap_freshdesk/streams.py index d6f0721..edfd1cc 100644 --- a/tap_freshdesk/streams.py +++ b/tap_freshdesk/streams.py @@ -24,17 +24,20 @@ def get_min_bookmark(stream, selected_streams, bookmark, start_date, state, book # Iterate through all children and return minimum bookmark among all. for child in stream_obj.children: - min_bookmark = min(min_bookmark, get_min_bookmark(child, selected_streams, bookmark, start_date, state, bookmark_key)) + min_bookmark = min(min_bookmark, get_min_bookmark( + child, selected_streams, bookmark, start_date, state, bookmark_key)) return min_bookmark + def get_schema(catalog, stream_id): """ Return the catalog of the specified stream. """ - stream_catalog = [cat for cat in catalog if cat['tap_stream_id'] == stream_id ][0] + stream_catalog = [cat for cat in catalog if cat['tap_stream_id'] == stream_id][0] return stream_catalog + def write_bookmark(stream, selected_streams, bookmark_value, state, predefined_filter=None): """ Write the bookmark in case the stream is selected. @@ -46,6 +49,7 @@ def write_bookmark(stream, selected_streams, bookmark_value, state, predefined_f stream_id = stream_id + '_' + predefined_filter singer.write_bookmark(state, stream_id, stream_obj.replication_keys[0], bookmark_value) + class Stream: """ Base class representing tap-freshdesk streams. @@ -86,9 +90,10 @@ def build_url(self, base_url, *args): """ Build the full url with parameters and attributes. """ - return base_url + '/api/v2/'+ self.path.format(*args) + return base_url + '/api/v2/' + self.path.format(*args) - def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, child_max_bookmarks, predefined_filter=None): + def write_records(self, catalog, state, selected_streams, start_date, data, max_bookmark, + client, streams_to_sync, child_max_bookmarks, predefined_filter=None): """ Transform the chunk of records according to the schema and write the records based on the bookmark. """ @@ -123,9 +128,11 @@ def write_records(self, catalog, state, selected_streams, start_date, data, max_ child_obj = STREAMS[child]() if child in selected_streams: child_obj.parent_id = row['id'] - child_max_bookmark = get_bookmark(state, child_obj.tap_stream_id, child_obj.replication_keys[0], start_date) + child_max_bookmark = get_bookmark(state, child_obj.tap_stream_id, + child_obj.replication_keys[0], start_date) # Update the child's max_bookmark as the max of the already present max value and the return value - child_max_bookmark = max(child_max_bookmarks.get(child, child_max_bookmark), child_obj.sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync)) + child_max_bookmark = max(child_max_bookmarks.get(child, child_max_bookmark), child_obj.sync_obj( + state, start_date, client, catalog, selected_streams, streams_to_sync)) child_max_bookmarks[child] = child_max_bookmark return max_bookmark, child_max_bookmarks @@ -143,7 +150,8 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams current_time = dt.strftime(dt.now(), DATETIME_FMT) # Get the minimum bookmark from the parent and the child streams - min_bookmark = get_min_bookmark(self.tap_stream_id, selected_streams, current_time, start_date, state, self.replication_keys[0], predefined_filter) + min_bookmark = get_min_bookmark(self.tap_stream_id, selected_streams, current_time, + start_date, state, self.replication_keys[0], predefined_filter) max_bookmark = min_bookmark # Initialize the child_max_bookmarks dictionary child_max_bookmarks = {} @@ -160,7 +168,9 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams data = client.request(full_url, params) self.paginate = len(data) >= client.page_size params['page'] += 1 - max_bookmark, child_max_bookmarks = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, child_max_bookmarks, predefined_filter) + max_bookmark, child_max_bookmarks = self.write_records( + catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, + child_max_bookmarks, predefined_filter) write_bookmark(self.tap_stream_id, selected_streams, max_bookmark, state, predefined_filter) # Write the max_bookmark for the child streams in the state files if they are selected. @@ -176,6 +186,7 @@ class Agents(Stream): tap_stream_id = 'agents' path = 'agents' + class Companies(Stream): """ https://developer.freshdesk.com/api/#list_all_companies @@ -183,6 +194,7 @@ class Companies(Stream): tap_stream_id = 'companies' path = 'companies' + class Groups(Stream): """ https://developer.freshdesk.com/api/#list_all_groups @@ -190,6 +202,7 @@ class Groups(Stream): tap_stream_id = 'groups' path = 'groups' + class Roles(Stream): """ https://developer.freshdesk.com/api/#list_all_roles @@ -197,7 +210,12 @@ class Roles(Stream): tap_stream_id = 'roles' path = 'roles' + class DateFilteredStream(Stream): + """ + Base class for all the streams that can be filtered by date. + """ + def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): """ The overridden sync_obj() method to fetch the records with different filters. @@ -207,19 +225,20 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams for each_filter in self.filters: # Update child bookmark to original_state for child in filter(lambda s: s in selected_streams, self.children): - singer.write_bookmark(state, child, "updated_at", get_bookmark(dup_state, child, "updated_at", start_date)) + singer.write_bookmark(state, child, "updated_at", get_bookmark( + dup_state, child, "updated_at", start_date)) super().sync_obj(state, start_date, client, catalog, selected_streams, streams_to_sync, each_filter) # Update the max child bookmarks dictionary with the maximum from the child and the existing bookmark - max_child_bms.update({child: max(max_child_bms.get(child, ""), get_bookmark(state, child, "updated_at", start_date)) - for child in self.children - if child in selected_streams}) + max_child_bms.update({child: max(max_child_bms.get(child, ""), get_bookmark( + state, child, "updated_at", start_date)) for child in self.children if child in selected_streams}) # Write the child stream bookmarks with the max value found for child, bm in max_child_bms.items(): singer.write_bookmark(state, child, "updated_at", bm) + class Tickets(DateFilteredStream): """ https://developer.freshdesk.com/api/#list_all_tickets @@ -238,6 +257,7 @@ class Tickets(DateFilteredStream): filter_keyword = 'filter' filters = [None, 'deleted', 'spam'] + class Contacts(DateFilteredStream): """ https://developer.freshdesk.com/api/#list_all_contacts @@ -251,6 +271,9 @@ class Contacts(DateFilteredStream): class ChildStream(Stream): + """ + Base class for all the child streams. + """ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams_to_sync, predefined_filter=None): """ @@ -262,7 +285,8 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams current_time = dt.strftime(dt.now(), DATETIME_FMT) # Get the min bookmark from the parent and the child streams - min_bookmark = get_min_bookmark(self.tap_stream_id, selected_streams, current_time, start_date, state, self.replication_keys[0], None) + min_bookmark = get_min_bookmark(self.tap_stream_id, selected_streams, current_time, + start_date, state, self.replication_keys[0], None) max_bookmark = min_bookmark params['page'] = 1 self.paginate = True @@ -274,10 +298,12 @@ def sync_obj(self, state, start_date, client, catalog, selected_streams, streams self.paginate = len(data) >= client.page_size params['page'] += 1 # Write the records based on the bookmark and return the max_bookmark for the page - bookmark, _ = self.write_records(catalog, state, selected_streams, start_date, data, max_bookmark, client, streams_to_sync, None) + bookmark, _ = self.write_records(catalog, state, selected_streams, start_date, + data, max_bookmark, client, streams_to_sync, None) max_bookmark = max(max_bookmark, bookmark) return max_bookmark + class Conversations(ChildStream): """ https://developer.freshdesk.com/api/#list_all_ticket_notes @@ -295,6 +321,7 @@ class SatisfactionRatings(ChildStream): path = 'tickets/{}/satisfaction_ratings' parent = 'tickets' + class TimeEntries(ChildStream): """ https://developer.freshdesk.com/api/#list_all_ticket_timeentries diff --git a/tap_freshdesk/sync.py b/tap_freshdesk/sync.py index 92521bb..0c604fb 100644 --- a/tap_freshdesk/sync.py +++ b/tap_freshdesk/sync.py @@ -3,6 +3,7 @@ LOGGER = singer.get_logger() + def write_schemas(stream_id, catalog, selected_streams): """ Write the schemas for each stream. @@ -11,12 +12,13 @@ def write_schemas(stream_id, catalog, selected_streams): if stream_id in selected_streams: # Get catalog object for a particular stream. - stream = [cat for cat in catalog['streams'] if cat['tap_stream_id'] == stream_id ][0] + stream = [cat for cat in catalog['streams'] if cat['tap_stream_id'] == stream_id][0] singer.write_schema(stream_id, stream['schema'], stream['key_properties']) for child in stream_obj.children: write_schemas(child, catalog, selected_streams) + def get_selected_streams(catalog): ''' Gets selected streams. Checks schema's 'selected' @@ -28,10 +30,11 @@ def get_selected_streams(catalog): stream_metadata = stream['metadata'] for entry in stream_metadata: # Stream metadata will have an empty breadcrumb - if not entry['breadcrumb'] and entry['metadata'].get('selected',None): + if not entry['breadcrumb'] and entry['metadata'].get('selected', None): selected_streams.append(stream['tap_stream_id']) return selected_streams + def update_currently_syncing(state, stream_name): """ Updates currently syncing stream in the state. @@ -42,6 +45,7 @@ def update_currently_syncing(state, stream_name): singer.set_currently_syncing(state, stream_name) singer.write_state(state) + def get_ordered_stream_list(currently_syncing, streams_to_sync): """ Get an ordered list of remaining streams to sync other streams followed by synced streams. @@ -52,6 +56,7 @@ def get_ordered_stream_list(currently_syncing, streams_to_sync): stream_list = stream_list[index:] + stream_list[:index] return stream_list + def get_stream_to_sync(selected_streams): """ Get the streams for which the sync function should be called @@ -74,6 +79,7 @@ def get_stream_to_sync(selected_streams): streams_to_sync.append(parent_stream) return streams_to_sync + def sync(client, config, state, catalog): """ Sync selected streams. @@ -95,6 +101,6 @@ def sync(client, config, state, catalog): update_currently_syncing(state, stream) stream_obj.sync_obj(state, config["start_date"], client, catalog['streams'], - selected_streams, streams_to_sync) + selected_streams, streams_to_sync) singer.write_state(state) update_currently_syncing(state, None) diff --git a/tests/unittests/test_bookmark_handling.py b/tests/unittests/test_bookmark_handling.py index ffdba5c..b33a8c3 100644 --- a/tests/unittests/test_bookmark_handling.py +++ b/tests/unittests/test_bookmark_handling.py @@ -4,6 +4,7 @@ START_DATE = '2022-09-00T00:00:00.000000Z' + class TestGetMinBookmark(unittest.TestCase): """ Test `get_min_bookmark` method of the stream class @@ -20,6 +21,7 @@ class TestGetMinBookmark(unittest.TestCase): } @parameterized.expand([ + # ["test_name", "selected_streams", "state", "expected_bookmark"] ['test_parent_only_with_state', ['tickets'], {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], ['test_child_only_with_state', ['conversations'], {'bookmarks': {'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-08-30T00:00:00.000000Z'], ['test_parent_only_without_state', ['tickets'], {}, START_DATE], @@ -27,16 +29,16 @@ class TestGetMinBookmark(unittest.TestCase): ['test_min_parent_bookmark_single_child', ['tickets', 'conversations'], {'bookmarks': {'tickets': {'updated_at': '2022-07-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-08-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], ['test_min_child_bookmark_single_child', ['tickets', 'conversations'], - {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-07-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], + {'bookmarks': {'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-07-30T00:00:00.000000Z'}}}, '2022-07-30T00:00:00.000000Z'], ['test_min_child_bookmark_multiple_child', ['tickets', 'conversations', 'time_entries'], - {'bookmarks': {'tickets': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], + {'bookmarks': {'tickets': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], ['test_multiple_child_only_bookmark', ['tickets', 'conversations', 'time_entries'], - {'bookmarks': {'time_entries': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], + {'bookmarks': {'time_entries': {'updated_at': '2022-09-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-09-30T00:00:00.000000Z'}}}, START_DATE], ['test_multiple_child_bookmark', ['tickets', 'conversations', 'time_entries'], - {'bookmarks': {'time_entries': {'updated_at': '2022-06-30T00:00:00.000000Z'}, 'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-11-30T00:00:00.000000Z'}}}, '2022-06-30T00:00:00.000000Z'] + {'bookmarks': {'time_entries': {'updated_at': '2022-06-30T00:00:00.000000Z'}, 'tickets': {'updated_at': '2022-08-30T00:00:00.000000Z'}, 'conversations': {'updated_at': '2022-11-30T00:00:00.000000Z'}}}, '2022-06-30T00:00:00.000000Z'] ]) - def test_min_bookmark(self, name, selected_streams, state, expected_bookmark): + def test_min_bookmark(self, test_name, selected_streams, state, expected_bookmark): """ Test that `get_min_bookmark` method returns the minimum bookmark from the parent and its corresponding child bookmarks. """ @@ -59,7 +61,7 @@ def test_get_schema(self): ] expected_schema = {"tap_stream_id": "agents"} - # Verify returned schema is same as exected schema + # Verify returned schema is same as expected schema self.assertEqual(get_schema(catalog, "agents"), expected_schema) @@ -69,15 +71,16 @@ class TestWriteBookmark(unittest.TestCase): """ @parameterized.expand([ - ["stream_not_selected", "agents", False, {"bookmarks": {}}], - ["stream_not_selected", "groups", True, {"bookmarks": {"groups": {"updated_at": "BOOKMARK_VALUE"}}}], + # ["test_name", "stream", "expected_state"] + ["stream_not_selected", "agents", {"bookmarks": {}}], + ["stream_not_selected", "groups", {"bookmarks": {"groups": {"updated_at": "BOOKMARK_VALUE"}}}], ]) - def test_write_bookmark(self, name, stream, is_called, expected_state): + def test_write_bookmark(self, test_name, stream, expected_state): """ Test that bookmark is written only if the stream is selected """ state = {"bookmarks": {}} write_bookmark(stream, ["roles", "groups"], "BOOKMARK_VALUE", state) - + # Verify that the final state is as expected self.assertEqual(state, expected_state) diff --git a/tests/unittests/test_check_access_token.py b/tests/unittests/test_check_access_token.py index aadaa2d..8d60121 100644 --- a/tests/unittests/test_check_access_token.py +++ b/tests/unittests/test_check_access_token.py @@ -11,7 +11,7 @@ class TestAccessToken(unittest.TestCase): @mock.patch("tap_freshdesk.client.FreshdeskClient.request") def test_access_token(self, mock_request): """ - Test that to check access token a request call is made. + Test that to check the access token a request call is made. """ config = {"domain": "sampleDomain"} _client = client.FreshdeskClient(config) diff --git a/tests/unittests/test_client.py b/tests/unittests/test_client.py index 9389615..de677a8 100644 --- a/tests/unittests/test_client.py +++ b/tests/unittests/test_client.py @@ -4,7 +4,8 @@ import requests import json -def get_response(status_code, json_resp={}, headers = None): + +def get_response(status_code, json_resp={}, headers=None): """ Returns mock response """ diff --git a/tests/unittests/test_currently_syncing.py b/tests/unittests/test_currently_syncing.py index c37cc52..8cd4db1 100644 --- a/tests/unittests/test_currently_syncing.py +++ b/tests/unittests/test_currently_syncing.py @@ -1,18 +1,19 @@ import unittest -from unittest import mock -from tap_freshdesk.sync import (update_currently_syncing, - get_ordered_stream_list) +from tap_freshdesk.sync import update_currently_syncing, get_ordered_stream_list + class TestGetOrderedStreamList(unittest.TestCase): """ Test `get_ordered_stream_list` function to get ordered list od streams """ - streams_to_sync = ["agents", "companies", "tickets", "conversations", "groups", "satisfaction_ratings", "time_entries"] + streams_to_sync = ["agents", "companies", "tickets", + "conversations", "groups", "satisfaction_ratings", "time_entries"] def test_currently_syncing_not_in_list(self): """Test if currently syncing is not available in `streams_to_sync` list, function returns sorted streams_to_sync list.""" - expected_list = ["agents", "companies", "conversations", "groups", "satisfaction_ratings", "tickets", "time_entries"] + expected_list = ["agents", "companies", "conversations", + "groups", "satisfaction_ratings", "tickets", "time_entries"] final_list = get_ordered_stream_list("roles", self.streams_to_sync) # Verify with expected ordered list of streams @@ -20,7 +21,8 @@ def test_currently_syncing_not_in_list(self): def test_for_interrupted_sync(self): """Test when the sync was interrupted, the function returns ordered list of streams starting with 'currently_syncing' stream.""" - expected_list = ["groups", "satisfaction_ratings", "tickets", "time_entries", "agents", "companies", "conversations"] + expected_list = ["groups", "satisfaction_ratings", "tickets", + "time_entries", "agents", "companies", "conversations"] final_list = get_ordered_stream_list("groups", self.streams_to_sync) # Verify with expected ordered list of streams @@ -28,17 +30,20 @@ def test_for_interrupted_sync(self): def test_for_completed_sync(self): """Test when sync was not interrupted, the function returns sorted streams_to_sync list.""" - expected_list = ["agents", "companies", "conversations", "groups", "satisfaction_ratings", "tickets", "time_entries"] + expected_list = ["agents", "companies", "conversations", + "groups", "satisfaction_ratings", "tickets", "time_entries"] final_list = get_ordered_stream_list(None, self.streams_to_sync) # Verify with expected ordered list of streams self.assertEqual(final_list, expected_list) + class TestUpdateCurrentlySyncing(unittest.TestCase): """ Test `update_currently_syncing` function of sync. """ + def test_update_syncing_stream(self): """Test for adding currently syncing stream in state.""" state = {"currently_syncing": "groups"} @@ -53,4 +58,4 @@ def test_flush_currently_syncing(self): update_currently_syncing(state, None) # Verify with expected state - self.assertEqual(state, {}) \ No newline at end of file + self.assertEqual(state, {}) diff --git a/tests/unittests/test_main.py b/tests/unittests/test_main.py index d56cdef..d5a802e 100644 --- a/tests/unittests/test_main.py +++ b/tests/unittests/test_main.py @@ -4,15 +4,17 @@ from tap_freshdesk import main from tap_freshdesk.discover import discover + class MockArgs: """Mock args object class""" - - def __init__(self, config = None, catalog = None, state = {}, discover = False) -> None: - self.config = config + + def __init__(self, config=None, catalog=None, state={}, discover=False) -> None: + self.config = config self.catalog = catalog self.state = state self.discover = discover + @mock.patch("tap_freshdesk.FreshdeskClient") @mock.patch("singer.utils.parse_args") class TestDiscoverMode(unittest.TestCase): @@ -26,7 +28,8 @@ class TestDiscoverMode(unittest.TestCase): def test_discover_with_config(self, mock_discover, mock_args, mock_verify_access): """Test `_discover` function is called for discover mode""" mock_discover.return_value = Catalog([]) - mock_args.return_value = MockArgs(discover = True, config = self.mock_config) + mock_args.return_value = MockArgs( + discover=True, config=self.mock_config) main() # Verify that `discover` was called @@ -48,7 +51,8 @@ class TestSyncMode(unittest.TestCase): def test_sync_with_catalog(self, mock_discover, mock_sync, mock_args, mock_check_access_token): """Test sync mode with catalog given in args""" - mock_args.return_value = MockArgs(config=self.mock_config, catalog=Catalog.from_dict(self.mock_catalog)) + mock_args.return_value = MockArgs(config=self.mock_config, + catalog=Catalog.from_dict(self.mock_catalog)) main() # Verify `_sync` is called with expected arguments @@ -61,7 +65,7 @@ def test_sync_with_catalog(self, mock_discover, mock_sync, mock_args, mock_check def test_sync_without_catalog(self, mock_discover, mock_sync, mock_args, mock_check_access_token): """Test sync mode without catalog given in args""" - mock_discover.return_value = catalog=Catalog.from_dict(self.mock_catalog) + mock_discover.return_value = Catalog.from_dict(self.mock_catalog) mock_args.return_value = MockArgs(config=self.mock_config) main() @@ -74,12 +78,15 @@ def test_sync_without_catalog(self, mock_discover, mock_sync, mock_args, mock_ch def test_sync_with_state(self, mock_sync, mock_args, mock_check_access_token): """Test sync mode with state given in args""" mock_state = {"bookmarks": {"projec ts": ""}} - mock_args.return_value = MockArgs(config=self.mock_config, catalog=Catalog.from_dict(self.mock_catalog), state=mock_state) + mock_args.return_value = MockArgs(config=self.mock_config, + catalog=Catalog.from_dict(self.mock_catalog), + state=mock_state) main() # Verify `_sync` is called with expected arguments mock_sync.assert_called_with(mock.ANY, self.mock_config, mock_state, self.mock_catalog) + class TestDiscover(unittest.TestCase): """Test `discover` function.""" diff --git a/tests/unittests/test_page_size.py b/tests/unittests/test_page_size.py index 76b0572..846b439 100644 --- a/tests/unittests/test_page_size.py +++ b/tests/unittests/test_page_size.py @@ -14,6 +14,7 @@ class TestPageSizeValue(unittest.TestCase): @parameterized.expand([ + # ["page_size_value", "expected_value"] [PAGE_SIZE_INT, PAGE_SIZE_INT], [PAGE_SIZE_STR_INT, PAGE_SIZE_INT], [PAGE_SIZE_STR_FLOAT, PAGE_SIZE_INT], @@ -32,6 +33,7 @@ def test_page_size_for_valid_values(self, page_size_value, expected_value): self.assertEqual(client.page_size, expected_value) @parameterized.expand([ + # ["page_size_value"] [PAGE_SIZE_INVALID_STRING], [PAGE_SIZE_STR_ZERO], [PAGE_SIZE_ZERO], diff --git a/tests/unittests/test_streams.py b/tests/unittests/test_streams.py index 0151e87..0ac97ee 100644 --- a/tests/unittests/test_streams.py +++ b/tests/unittests/test_streams.py @@ -1,16 +1,17 @@ import unittest from unittest import mock from parameterized import parameterized -from tap_freshdesk.streams import Agents, Tickets, get_min_bookmark +from tap_freshdesk.streams import Agents, Tickets + class TestSyncObj(unittest.TestCase): """ - Test `sync_obj` mehtod of stream. + Test `sync_obj` method of stream. """ start_date = "2019-06-01T00:00:00Z" only_parent_response = [ - [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [1,5,2]], # Tickets Response + [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [1, 5, 2]], # Tickets Response [{"id": "33", "updated_at": f"2020-03-01T00:00:00Z"}], # Deleted tickets Response [{"id": "55", "updated_at": f"2020-04-01T00:00:00Z"}], # Spam tickets Response ] @@ -20,11 +21,11 @@ class TestSyncObj(unittest.TestCase): "tickets_spam": "2020-04-01T00:00:00Z", } with_child_response = [ - [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [1,5,2]], # Tickets Response - [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [2,4]], # conversations Response + [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [1, 5, 2]], # Tickets Response + [{"id": i, "updated_at": f"2020-0{i}-01T00:00:00Z"} for i in [2, 4]], # conversations Response [{"id": "33", "updated_at": "2020-03-01T00:00:00Z"}], # conversations Response [{"id": "55", "updated_at": "2020-04-01T00:00:00Z"}], # conversations Response - [],[] # Deleted/Spam tickets response + [], [] # Deleted/Spam tickets response ] written_states_2 = { "conversations": "2020-04-01T00:00:00Z", @@ -34,28 +35,34 @@ class TestSyncObj(unittest.TestCase): "conversations": "2020-04-01T00:00:00Z", } expected_state_1 = { - "conversations": {"updated_at": "2020-04-01T00:00:00Z"}, - "tickets": {"updated_at": "2020-03-15T00:00:00Z"}, - "tickets_deleted": {"updated_at": "2020-05-01T00:00:00Z"}, - "tickets_spam": {"updated_at": "2020-04-01T00:00:00Z"} - } + "conversations": {"updated_at": "2020-04-01T00:00:00Z"}, + "tickets": {"updated_at": "2020-03-15T00:00:00Z"}, + "tickets_deleted": {"updated_at": "2020-05-01T00:00:00Z"}, + "tickets_spam": {"updated_at": "2020-04-01T00:00:00Z"} + } expected_state_2 = {'conversations': {'updated_at': '2020-04-01T00:00:00Z'}, 'tickets': {'updated_at': '2019-06-01T00:00:00Z'}, 'tickets_deleted': {'updated_at': '2020-05-01T00:00:00Z'}, 'tickets_spam': {'updated_at': '2020-04-01T00:00:00Z'}} expected_state_3 = { - **expected_state_1, - "tickets": {"updated_at": "2020-03-16T00:00:00Z"}, - } + **expected_state_1, + "tickets": {"updated_at": "2020-03-16T00:00:00Z"}, + } @parameterized.expand([ - ["parent_selected", ["tickets"], ["tickets"], only_parent_response, 5, written_states_1], - ["child_selected", ["conversations"], ["tickets", "conversations"], with_child_response, 4, written_states_2], - ["parent_child_both_selected", ["tickets", "conversations"], ["tickets", "conversations"], with_child_response, 7, written_states_3], + # ["test_name", "selected_streams", "streams_to_sync", "responses", "written_records", "written_states"] + ["parent_selected", ["tickets"], ["tickets"], + only_parent_response, 5, written_states_1], + ["child_selected", ["conversations"], ["tickets", "conversations"], + with_child_response, 4, written_states_2], + ["parent_child_both_selected", ["tickets", "conversations"], ["tickets", "conversations"], + with_child_response, 7, written_states_3], ]) @mock.patch("singer.write_record") @mock.patch("singer.write_bookmark") - def test_stream(self, name, selected_streams, streams_to_sync, responses, written_records, written_states, mock_write_bookmark, mock_write_record): + def test_stream_sync_obj( + self, test_name, selected_streams, streams_to_sync, responses, written_records, written_states, + mock_write_bookmark, mock_write_record): """ Test that stream is writing records and bookmarks only if selected. """ @@ -66,8 +73,8 @@ def test_stream(self, name, selected_streams, streams_to_sync, responses, writte client.page_size = 100 client.request.side_effect = responses catalog = [ - {"schema":{}, "tap_stream_id": "tickets", "metadata": []}, - {"schema":{}, "tap_stream_id": "conversations", "metadata": []} + {"schema": {}, "tap_stream_id": "tickets", "metadata": []}, + {"schema": {}, "tap_stream_id": "conversations", "metadata": []} ] stream.sync_obj(state, self.start_date, client, catalog, selected_streams, streams_to_sync) @@ -79,42 +86,49 @@ def test_stream(self, name, selected_streams, streams_to_sync, responses, writte for stream, bookmark in written_states.items(): mock_write_bookmark.assert_any_call({}, stream, "updated_at", bookmark) - @parameterized.expand([ + # ["test_name", "state", "expected_state", "written_records"] ["without_state", dict(), expected_state_1, 13], ["with_parent_state", {"bookmarks": {"tickets": {"updated_at": "2020-03-16T00:00:00Z"}}}, expected_state_2, 10], - ["with_child_state", {"bookmarks": {"conversations": {"updated_at": "2020-03-23T00:00:00Z"}}}, expected_state_1, 8], - ["with_both_state", {"bookmarks": {"tickets": {"updated_at": "2020-03-16T00:00:00Z"}, "conversations": {"updated_at": "2020-03-23T00:00:00Z"}}}, expected_state_3, 5], + ["with_child_state", {"bookmarks": {"conversations": { + "updated_at": "2020-03-23T00:00:00Z"}}}, expected_state_1, 8], + ["with_both_state", {"bookmarks": {"tickets": {"updated_at": "2020-03-16T00:00:00Z"}, + "conversations": {"updated_at": "2020-03-23T00:00:00Z"}}}, expected_state_3, 5], ]) @mock.patch("singer.write_record") - def test_parent_child_both_selected(self, name, state, expected_state, written_records, mock_write_record): + def test_parent_child_both_selected(self, test_name, state, expected_state, written_records, mock_write_record): """ Test parent and child streams both selected in given conditions: - - Without state - - With only parent bookmark - - With only child bookmark - - With both parent and child bookmark + - Without a state, all the records will be written. + - With only parent bookmark parent records with replication value < bookmark will not be written. + - With only a child bookmark child records with replication value < bookmark will not be written. + - With both parent and child bookmarks, both follow bookmarks. """ stream = Tickets() client = mock.Mock() client.base_url = "" client.page_size = 100 client.request.side_effect = [ - [{"id": i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [11,15,12]], # Tickets Response - [{"id": 10+i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [13,24]], # conversations Response + [{"id": i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [11, 15, 12]], # Tickets Response + [{"id": 10+i, "updated_at": f"2020-03-{i}T00:00:00Z"} for i in [13, 24]], # conversations Response [{"id": 13, "updated_at": "2020-03-01T00:00:00Z"}], # conversations Response [{"id": 95, "updated_at": "2020-04-01T00:00:00Z"}], # conversations Response - [{"id": 73, "updated_at": "2020-05-01T00:00:00Z"}], # Deleted tickets response - [{"id": 30+i, "updated_at": f"2020-03-{i}T00:00:00Z"}for i in [22,10]], # conversations response + [{"id": 73, "updated_at": "2020-05-01T00:00:00Z"}], # Deleted tickets response + [{"id": 30+i, "updated_at": f"2020-03-{i}T00:00:00Z"}for i in [22, 10]], # conversations response [{"id": 43, "updated_at": "2020-04-01T00:00:00Z"}], # Spam tickets response - [{"id": 50+i, "updated_at": f"2020-03-{i}T00:00:00Z"}for i in [12,25]], # conversations response + [{"id": 50+i, "updated_at": f"2020-03-{i}T00:00:00Z"}for i in [12, 25]], # conversations response ] catalog = [ - {"schema":{}, "tap_stream_id": "tickets", "metadata": []}, - {"schema":{}, "tap_stream_id": "conversations", "metadata": []} + {"schema": {}, "tap_stream_id": "tickets", "metadata": []}, + {"schema": {}, "tap_stream_id": "conversations", "metadata": []} ] - stream.sync_obj(state, self.start_date, client, catalog, ["tickets", "conversations"], ["tickets", "conversations"]) + stream.sync_obj(state=state, + start_date=self.start_date, + client=client, + catalog=catalog, + selected_streams=["tickets", "conversations"], + streams_to_sync=["tickets", "conversations"]) self.assertEqual(mock_write_record.call_count, written_records) self.assertDictEqual(state, {"bookmarks": expected_state}) @@ -126,17 +140,22 @@ class TestSyncTransformDict(unittest.TestCase): stream = Agents() expected_list_1 = [{"name": "Agency", "value": "Justice League"}, - {"name": "Department", "value": "Superhero"}] + {"name": "Department", "value": "Superhero"}] expected_list_2 = [{"key": "Agency", "data": "Justice League"}, - {"key": "Department", "data": "Superhero"}] + {"key": "Department", "data": "Superhero"}] expected_list_3 = [{"name": "Agency", "value": "justice league"}, - {"name": "Department", "value": "superhero"}] - @parameterized.expand([ - ["coverting_dict_to_list", {"Agency": "Justice League", "Department": "Superhero"}, expected_list_1, {}], - ["With_custom_keys", {"Agency": "Justice League", "Department": "Superhero"}, expected_list_2, {"key_key":"key", "value_key":"data"}], - ["With_string_value", {"Agency": "Justice League", "Department": "Superhero"}, expected_list_3, {"force_str": True}], + {"name": "Department", "value": "superhero"}] + + @ parameterized.expand([ + # ["test_name", "dictionary", "expected_list", "kwargs"] + ["coverting_dict_to_list", {"Agency": "Justice League", "Department": "Superhero"}, + expected_list_1, {}], + ["With_custom_keys", {"Agency": "Justice League", "Department": "Superhero"}, + expected_list_2, {"key_key": "key", "value_key": "data"}], + ["With_string_value", {"Agency": "Justice League", "Department": "Superhero"}, + expected_list_3, {"force_str": True}], ]) - def test_transform(self, name, dictionary, expected_list, kwargs): + def test_transform(self, test_name, dictionary, expected_list, kwargs): """ Test that the dictionary is transformed as per given conditions: - Value is a lowercase string when force_str: True diff --git a/tests/unittests/test_sync.py b/tests/unittests/test_sync.py index 17da4fc..6f1e434 100644 --- a/tests/unittests/test_sync.py +++ b/tests/unittests/test_sync.py @@ -5,24 +5,24 @@ get_stream_to_sync, sync) -def get_stream_catalog(stream_name, is_selected = False): +def get_stream_catalog(stream_name, is_selected=False): """Return catalog for stream""" return { - "schema":{}, - "tap_stream_id": stream_name, - "metadata": [ - { - "breadcrumb": [], - "metadata":{"selected": is_selected} - } - ], - "key_properties": [] + "schema": {}, + "tap_stream_id": stream_name, + "metadata": [ + { + "breadcrumb": [], + "metadata":{"selected": is_selected} } + ], + "key_properties": [] + } def get_catalog(parent=False, child=False): """Return complete catalog""" - + return { "streams": [ get_stream_catalog("agents"), @@ -35,7 +35,6 @@ def get_catalog(parent=False, child=False): } - class TestSyncFunctions(unittest.TestCase): """ Test `sync` function. @@ -43,17 +42,24 @@ class TestSyncFunctions(unittest.TestCase): # NOTE: For `tickets` stream `sync_obj` is called 3 times @parameterized.expand([ - ["only_parent_selected", get_catalog(parent=True), ["companies", "tickets", "groups"], 5], - ["only_child_selected", get_catalog(child=True), ["conversations", "time_entries"], 3], - ["both_selected", get_catalog(parent=True, child=True), ["companies", "tickets", "groups", "conversations", "time_entries"], 5], - ["No_streams_selected", get_catalog(), [], 0], + # ["test_name", "mock_catalog", "selected_streams", "synced_streams"] + ["only_parent_selected", get_catalog(parent=True), + ["companies", "tickets", "groups"], 5], + ["only_child_selected", get_catalog(child=True), + ["conversations", "time_entries"], 3], + ["both_selected", get_catalog(parent=True, child=True), + ["companies", "tickets", "groups", "conversations", "time_entries"], 5], + ["No_streams_selected", get_catalog(), + [], 0], ]) @mock.patch("singer.write_state") @mock.patch("singer.write_schema") @mock.patch("tap_freshdesk.streams.Stream.sync_obj") - def test_sync(self, name, mock_catalog, selected_streams, synced_streams, mock_sync_endpoint, mock_write_schemas, mock_write_state): + def test_sync(self, test_name, mock_catalog, selected_streams, synced_streams, + mock_sync_endpoint, mock_write_schemas, mock_write_state): """ - Test sync function. + Test sync function that for child streams selected parent sync_obj is called and, + schema is written only for selected streams. """ client = mock.Mock() sync(client, {'start_date': ""}, {}, mock_catalog) @@ -67,9 +73,9 @@ def test_sync(self, name, mock_catalog, selected_streams, synced_streams, mock_s self.assertEqual(mock_sync_endpoint.call_count, synced_streams) -class TestWriteSchemas(unittest.TestCase): +class TestUtilsFunction(unittest.TestCase): """ - Test `write_schemas` function. + Test functions used in `sync`. """ mock_catalog = {"streams": [ @@ -79,32 +85,28 @@ class TestWriteSchemas(unittest.TestCase): ]} @parameterized.expand([ + # [test_name, selected_streams, mock_write_schema] ["parents_selected", ["tickets"]], ["child_selected", ["time_entries"]], ["parent_and_child_selected", ["tickets", "conversations"]], ]) @mock.patch("singer.write_schema") - def test_write_schema(self, name, selected_streams, mock_write_schema): + def test_write_schema(self, test_name, selected_streams, mock_write_schema): """ - Test that only schema is written for only selected streams. + Test that `write_schemas` function writes schema of only selected streams. """ write_schemas("tickets", self.mock_catalog, selected_streams) for stream in selected_streams: # Verify write_schema function is called. mock_write_schema.assert_any_call(stream, mock.ANY, mock.ANY) - -class TestGetStreamsToSync(unittest.TestCase): - """ - Testcase for `get_stream_to_sync` in sync. - """ - @parameterized.expand([ + # ["test_name", "selected_streams", "expected_streams"] ['test_parent_selected', ["tickets"], ["tickets"]], ['test_child_selected', ["conversations", "satisfaction_ratings"], ["tickets"]], ['test_both_selected', ["conversations", "roles", "tickets"], ["roles", "tickets"]] ]) - def test_sync_streams(self, name, selected_streams, expected_streams): + def test_get_sync_streams(self, test_name, selected_streams, expected_streams): """ Test that if an only child is selected in the catalog, then `get_stream_to_sync` returns the parent streams if selected stream is child. @@ -114,12 +116,6 @@ def test_sync_streams(self, name, selected_streams, expected_streams): # Verify that the expected list of streams is returned self.assertEqual(sync_streams, expected_streams) - -class TestGetSelectedStreams(unittest.TestCase): - """ - Testcase for `get_selected_streams` in sync. - """ - def test_streams_selection(self): """ Test that `get_selected_streams` returns the list of selected streams.