Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TDL-20356 update function based to class based #51

Open
wants to merge 37 commits into
base: TDL-20357-add-missing-tap-tester-tests
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
7f23929
TDL-20356 implement function based to class based
namrata270998 Aug 22, 2022
72d89a6
added comments to the client
namrata270998 Aug 22, 2022
42305fd
updated config.yml
namrata270998 Aug 22, 2022
d1963de
added sync for normal streams
namrata270998 Aug 23, 2022
a254e6d
added pagination
namrata270998 Aug 23, 2022
7bce692
added parent child
namrata270998 Aug 23, 2022
d82d65b
updated the parent child code
namrata270998 Aug 23, 2022
8c197ac
updated parent child code
namrata270998 Aug 24, 2022
314d916
added currently syncing and updated bookmark logic
namrata270998 Aug 25, 2022
0785754
updated child bookmarking logic
namrata270998 Aug 25, 2022
445a3fd
added code comments and updated unittests
namrata270998 Aug 25, 2022
c316fdf
updated streams.py file
namrata270998 Aug 25, 2022
968ebdc
removed unused code
namrata270998 Aug 25, 2022
c11a8f4
make pylint happy
namrata270998 Aug 25, 2022
69fa2a5
make pylint happy
namrata270998 Aug 25, 2022
1837bf3
made pylint happy
namrata270998 Aug 25, 2022
620079d
Merge branch 'TDL-20357-add-missing-tap-tester-tests' of https://gith…
namrata270998 Aug 25, 2022
2adbed7
resolved issue of params getting used for other streams
namrata270998 Aug 25, 2022
4b8394b
fixed the issue for bookmarking of child streams and added more code …
namrata270998 Aug 25, 2022
7085c84
removed commented code and optimized
namrata270998 Aug 26, 2022
2970a04
Merge branch 'TDL-20357-add-missing-tap-tester-tests' of https://gith…
namrata270998 Aug 26, 2022
38df735
make pylint happy
namrata270998 Aug 26, 2022
c294a79
added currently syncing as None at the end of the sync and added inte…
namrata270998 Aug 26, 2022
1229a76
increased code coverage
namrata270998 Aug 26, 2022
d16cc36
Resolved review comments.
prijendev Aug 29, 2022
97aa802
Merge branch 'TDL-20356-update-function-based-to-class-based' of http…
prijendev Aug 29, 2022
69cb3ab
Resolved pylint errors.
prijendev Aug 30, 2022
805c830
used singer utils ratelimit and resolved review comments
namrata270998 Sep 6, 2022
042885a
Resolved pylint error.
prijendev Sep 12, 2022
f409ae6
Resolved parent child sync issue.
prijendev Sep 12, 2022
b44017c
Revert "Resolved pylint error."
prijendev Sep 12, 2022
5a8a71a
Updated unit test for get_min_bookmark function.
prijendev Sep 12, 2022
a9daace
Added configurable pagination parameter.
prijendev Sep 15, 2022
36ffa95
Updated test_streams unit test case.
prijendev Sep 15, 2022
2ed56db
Merge remote-tracking branch 'origin/TDL-20357-add-missing-tap-tester…
prijendev Sep 16, 2022
c308dab
Merge branch 'TDL-20357-add-missing-tap-tester-tests' into TDL-20356-…
prijendev Sep 21, 2022
ef05eaf
Resolved autopep8 errors.
prijendev Sep 21, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@ jobs:
command: |
source /usr/local/share/virtualenvs/tap-tester/bin/activate
stitch-validate-json tap_freshdesk/schemas/*.json
- run:
name: 'pylint'
command: |
source /usr/local/share/virtualenvs/tap-freshdesk/bin/activate
pylint tap_freshdesk --disable 'missing-module-docstring,missing-function-docstring,missing-class-docstring,line-too-long,invalid-name,too-many-lines,consider-using-f-string,too-many-arguments,too-many-locals'
- run:
name: 'Unit Tests'
command: |
source /usr/local/share/virtualenvs/tap-freshdesk/bin/activate
pip install nose coverage parameterized
nosetests --with-coverage --cover-erase --cover-package=tap_freshdesk --cover-html-dir=htmlcov tests/unittests
coverage html
when: always
- store_test_results:
path: test_output/report.xml
- store_artifacts:
path: htmlcov
- run:
name: 'Integration Tests'
command: |
Expand Down
9 changes: 7 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@
classifiers=['Programming Language :: Python :: 3 :: Only'],
py_modules=['tap_freshdesk'],
install_requires=[
'singer-python==5.2.3',
'singer-python==5.12.2',
'requests==2.20.0',
'backoff==1.3.2'
'backoff==1.8.0'
],
extras_require={
'dev': [
'pylint',
]
},
entry_points='''
[console_scripts]
tap-freshdesk=tap_freshdesk:main
Expand Down
268 changes: 18 additions & 250 deletions tap_freshdesk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,259 +1,27 @@
#!/usr/bin/env python3

import sys
import time

import backoff
import requests
from requests.exceptions import HTTPError
import singer
from singer import utils
from tap_freshdesk.discover import discover as _discover
from tap_freshdesk.sync import sync as _sync
from tap_freshdesk.client import FreshdeskClient

from tap_freshdesk import utils


REQUIRED_CONFIG_KEYS = ['api_key', 'domain', 'start_date']
PER_PAGE = 100
BASE_URL = "https://{}.freshdesk.com"
CONFIG = {}
STATE = {}

endpoints = {
"tickets": "/api/v2/tickets",
"sub_ticket": "/api/v2/tickets/{id}/{entity}",
"agents": "/api/v2/agents",
"roles": "/api/v2/roles",
"groups": "/api/v2/groups",
"companies": "/api/v2/companies",
"contacts": "/api/v2/contacts",
}

logger = singer.get_logger()
session = requests.Session()


def get_url(endpoint, **kwargs):
return BASE_URL.format(CONFIG['domain']) + endpoints[endpoint].format(**kwargs)


@backoff.on_exception(backoff.expo,
(requests.exceptions.RequestException),
max_tries=5,
giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500,
factor=2)
@utils.ratelimit(1, 2)
def request(url, params=None):
params = params or {}
headers = {}
if 'user_agent' in CONFIG:
headers['User-Agent'] = CONFIG['user_agent']

req = requests.Request('GET', url, params=params, auth=(CONFIG['api_key'], ""), headers=headers).prepare()
logger.info("GET {}".format(req.url))
resp = session.send(req)

if 'Retry-After' in resp.headers:
retry_after = int(resp.headers['Retry-After'])
logger.info("Rate limit reached. Sleeping for {} seconds".format(retry_after))
time.sleep(retry_after)
return request(url, params)

resp.raise_for_status()

return resp


def get_start(entity):
if entity not in STATE:
STATE[entity] = CONFIG['start_date']

return STATE[entity]


def gen_request(url, params=None):
params = params or {}
params["per_page"] = PER_PAGE
page = 1
while True:
params['page'] = page
data = request(url, params).json()
for row in data:
yield row

if len(data) == PER_PAGE:
page += 1
else:
break


def transform_dict(d, key_key="name", value_key="value", force_str=False):
# Custom fields are expected to be strings, but sometimes the API sends
# booleans. We cast those to strings to match the schema.
rtn = []
for k, v in d.items():
if force_str:
v = str(v).lower()
rtn.append({key_key: k, value_key: v})
return rtn


def sync_tickets():
bookmark_property = 'updated_at'

singer.write_schema("tickets",
utils.load_schema("tickets"),
["id"],
bookmark_properties=[bookmark_property])

singer.write_schema("conversations",
utils.load_schema("conversations"),
["id"],
bookmark_properties=[bookmark_property])

singer.write_schema("satisfaction_ratings",
utils.load_schema("satisfaction_ratings"),
["id"],
bookmark_properties=[bookmark_property])

singer.write_schema("time_entries",
utils.load_schema("time_entries"),
["id"],
bookmark_properties=[bookmark_property])
REQUIRED_CONFIG_KEYS = ["start_date", "domain", "api_key"]

sync_tickets_by_filter(bookmark_property)
sync_tickets_by_filter(bookmark_property, "deleted")
sync_tickets_by_filter(bookmark_property, "spam")


def sync_tickets_by_filter(bookmark_property, predefined_filter=None):
endpoint = "tickets"

state_entity = endpoint
if predefined_filter:
state_entity = state_entity + "_" + predefined_filter

start = get_start(state_entity)

params = {
'updated_since': start,
'order_by': bookmark_property,
'order_type': "asc",
'include': "requester,company,stats"
}

if predefined_filter:
logger.info("Syncing tickets with filter {}".format(predefined_filter))

if predefined_filter:
params['filter'] = predefined_filter

for i, row in enumerate(gen_request(get_url(endpoint), params)):
logger.info("Ticket {}: Syncing".format(row['id']))
row.pop('attachments', None)
row['custom_fields'] = transform_dict(row['custom_fields'], force_str=True)

# get all sub-entities and save them
logger.info("Ticket {}: Syncing conversations".format(row['id']))

try:
for subrow in gen_request(get_url("sub_ticket", id=row['id'], entity="conversations")):
subrow.pop("attachments", None)
subrow.pop("body", None)
if subrow[bookmark_property] >= start:
singer.write_record("conversations", subrow, time_extracted=singer.utils.now())
except HTTPError as e:
if e.response.status_code == 403:
logger.info('Invalid ticket ID requested from Freshdesk {0}'.format(row['id']))
else:
raise

try:
logger.info("Ticket {}: Syncing satisfaction ratings".format(row['id']))
for subrow in gen_request(get_url("sub_ticket", id=row['id'], entity="satisfaction_ratings")):
subrow['ratings'] = transform_dict(subrow['ratings'], key_key="question")
if subrow[bookmark_property] >= start:
singer.write_record("satisfaction_ratings", subrow, time_extracted=singer.utils.now())
except HTTPError as e:
if e.response.status_code == 403:
logger.info("The Surveys feature is unavailable. Skipping the satisfaction_ratings stream.")
else:
raise

try:
logger.info("Ticket {}: Syncing time entries".format(row['id']))
for subrow in gen_request(get_url("sub_ticket", id=row['id'], entity="time_entries")):
if subrow[bookmark_property] >= start:
singer.write_record("time_entries", subrow, time_extracted=singer.utils.now())

except HTTPError as e:
if e.response.status_code == 403:
logger.info("The Timesheets feature is unavailable. Skipping the time_entries stream.")
elif e.response.status_code == 404:
# 404 is being returned for deleted tickets and spam
logger.info("Could not retrieve time entries for ticket id {}. This may be caused by tickets "
"marked as spam or deleted.".format(row['id']))
else:
raise

utils.update_state(STATE, state_entity, row[bookmark_property])
singer.write_record(endpoint, row, time_extracted=singer.utils.now())
singer.write_state(STATE)


def sync_time_filtered(entity):
bookmark_property = 'updated_at'

singer.write_schema(entity,
utils.load_schema(entity),
["id"],
bookmark_properties=[bookmark_property])
start = get_start(entity)

logger.info("Syncing {} from {}".format(entity, start))
for row in gen_request(get_url(entity)):
if row[bookmark_property] >= start:
if 'custom_fields' in row:
row['custom_fields'] = transform_dict(row['custom_fields'], force_str=True)

utils.update_state(STATE, entity, row[bookmark_property])
singer.write_record(entity, row, time_extracted=singer.utils.now())

singer.write_state(STATE)


def do_sync():
logger.info("Starting FreshDesk sync")

try:
sync_tickets()
sync_time_filtered("agents")
sync_time_filtered("roles")
sync_time_filtered("groups")
# commenting out this high-volume endpoint for now
#sync_time_filtered("contacts")
sync_time_filtered("companies")
except HTTPError as e:
logger.critical(
"Error making request to Freshdesk API: GET %s: [%s - %s]",
e.request.url, e.response.status_code, e.response.content)
sys.exit(1)

logger.info("Completed sync")


def main_impl():
config, state = utils.parse_args(REQUIRED_CONFIG_KEYS)
CONFIG.update(config)
STATE.update(state)
do_sync()
LOGGER = singer.get_logger()


@utils.handle_top_exception(LOGGER)
def main():
try:
main_impl()
except Exception as exc:
logger.critical(exc)
raise exc

args = utils.parse_args(REQUIRED_CONFIG_KEYS)
config = args.config
with FreshdeskClient(config) as client:
if args.discover:
catalog = _discover()
catalog.dump()
else:
catalog = args.catalog \
if args.catalog else _discover()
_sync(client, config, args.state, catalog.to_dict())

if __name__ == '__main__':
if __name__ == "__main__":
main()
63 changes: 63 additions & 0 deletions tap_freshdesk/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import time
import backoff
import requests
import singer
from singer import utils


LOGGER = singer.get_logger()
BASE_URL = "https://{}.freshdesk.com"


class FreshdeskClient:
"""
The client class is used for making REST calls to the Freshdesk API.
"""

def __init__(self, config):
self.config = config
self.session = requests.Session()
self.base_url = BASE_URL.format(config.get("domain"))

def __enter__(self):
self.check_access_token()
return self

def __exit__(self, exception_type, exception_value, traceback):
# Kill the session instance.
self.session.close()

def check_access_token(self):
"""
Check if the access token is valid.
"""
self.request(self.base_url+"/api/v2/roles", {"per_page": 1, "page": 1})

@backoff.on_exception(backoff.expo,
(requests.exceptions.RequestException),
max_tries=5,
giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500,
factor=2)
@utils.ratelimit(1, 2)
def request(self, url, params=None):
"""
Call rest API and return the response in case of status code 200.
"""
headers = {}
if 'user_agent' in self.config:
headers['User-Agent'] = self.config['user_agent']

req = requests.Request('GET', url, params=params, auth=(self.config['api_key'], ""), headers=headers).prepare()
LOGGER.info("GET %s", req.url)
response = self.session.send(req)

# Call the function again if the rate limit is exceeded
if 'Retry-After' in response.headers:
retry_after = int(response.headers['Retry-After'])
LOGGER.info("Rate limit reached. Sleeping for %s seconds", retry_after)
time.sleep(retry_after)
return self.request(url, params)

response.raise_for_status()

return response.json()
Loading