From 61dd74ca965f9a53a0f088c8faf551751bfee47d Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Mon, 17 Sep 2018 15:24:38 +0000 Subject: [PATCH 1/5] Increase max size of CSV to system's max int --- tap_salesforce/salesforce/bulk.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tap_salesforce/salesforce/bulk.py b/tap_salesforce/salesforce/bulk.py index b31bff49..24781701 100644 --- a/tap_salesforce/salesforce/bulk.py +++ b/tap_salesforce/salesforce/bulk.py @@ -1,6 +1,7 @@ # pylint: disable=protected-access import csv import json +import sys import time import tempfile import singer @@ -40,6 +41,8 @@ class Bulk(object): bulk_url = "{}/services/async/41.0/{}" def __init__(self, sf): + # Set csv max reading size to the platform's max size available. + csv.field_size_limit(sys.maxsize) self.sf = sf def query(self, catalog_entry, state): From ca42c6a5c0179404e4d090d85227bb459eaaf0be Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Mon, 17 Sep 2018 17:26:28 +0000 Subject: [PATCH 2/5] Pylint fixes --- .circleci/config.yml | 2 +- tap_salesforce/__init__.py | 12 +++++------- tap_salesforce/salesforce/__init__.py | 5 ++--- tap_salesforce/salesforce/bulk.py | 4 ++-- tap_salesforce/salesforce/rest.py | 2 +- tap_salesforce/sync.py | 3 +-- 6 files changed, 12 insertions(+), 16 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 053939ae..d378ab40 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -12,7 +12,7 @@ jobs: source ~/.virtualenvs/tap-salesforce/bin/activate pip install . pip install pylint - pylint tap_salesforce -d missing-docstring,invalid-name,line-too-long,too-many-locals,too-few-public-methods,fixme,stop-iteration-return + pylint tap_salesforce -d missing-docstring,invalid-name,line-too-long,too-many-locals,too-few-public-methods,fixme,stop-iteration-return,no-else-return - run: name: 'Unit Tests' command: | diff --git a/tap_salesforce/__init__.py b/tap_salesforce/__init__.py index eb09483a..5515d8cd 100644 --- a/tap_salesforce/__init__.py +++ b/tap_salesforce/__init__.py @@ -2,9 +2,8 @@ import json import sys import singer -import singer.metrics as metrics import singer.utils as singer_utils -from singer import metadata +from singer import metadata, metrics import tap_salesforce.salesforce from tap_salesforce.sync import (sync_stream, resume_syncing_bulk_query, get_stream_version) @@ -102,8 +101,7 @@ def do_discover(sf): """Describes a Salesforce instance's objects and generates a JSON schema for each field.""" global_description = sf.describe() - objects_to_discover = set([o['name'] - for o in global_description['sobjects']]) + objects_to_discover = {o['name'] for o in global_description['sobjects']} key_properties = ['Id'] sf_custom_setting_objects = [] @@ -177,7 +175,7 @@ def do_discover(sf): # There are cases where compound fields are referenced by the associated # subfields but are not actually present in the field list - field_name_set = set([f['name'] for f in fields]) + field_name_set = {f['name'] for f in fields]} filtered_unsupported_fields = [f for f in unsupported_fields if f[0] in field_name_set] missing_unsupported_field_names = [f[0] for f in unsupported_fields if f[0] not in field_name_set] @@ -246,8 +244,8 @@ def do_discover(sf): for f in sf_custom_setting_objects if f in object_to_tag_references] if unsupported_tag_objects: LOGGER.info( - "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + - "are not supported by the Bulk API:") + ("Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + + "are not supported by the Bulk API:")) LOGGER.info(unsupported_tag_objects) entries = [e for e in entries if e['stream'] not in unsupported_tag_objects] diff --git a/tap_salesforce/salesforce/__init__.py b/tap_salesforce/salesforce/__init__.py index dca51e0f..6f7d4ffa 100644 --- a/tap_salesforce/salesforce/__init__.py +++ b/tap_salesforce/salesforce/__init__.py @@ -5,9 +5,8 @@ import requests from requests.exceptions import RequestException import singer -import singer.metrics as metrics import singer.utils as singer_utils -from singer import metadata +from singer import metadata, metrics from tap_salesforce.salesforce.bulk import Bulk from tap_salesforce.salesforce.rest import Rest @@ -185,7 +184,7 @@ def field_to_property_schema(field, mdata): return property_schema, mdata -class Salesforce(object): +class Salesforce(): # pylint: disable=too-many-instance-attributes,too-many-arguments def __init__(self, refresh_token=None, diff --git a/tap_salesforce/salesforce/bulk.py b/tap_salesforce/salesforce/bulk.py index 24781701..cae5ba1f 100644 --- a/tap_salesforce/salesforce/bulk.py +++ b/tap_salesforce/salesforce/bulk.py @@ -5,7 +5,7 @@ import time import tempfile import singer -import singer.metrics as metrics +from singer import metrics import xmltodict @@ -36,7 +36,7 @@ def find_parent(stream): return parent_stream -class Bulk(object): +class Bulk(): bulk_url = "{}/services/async/41.0/{}" diff --git a/tap_salesforce/salesforce/rest.py b/tap_salesforce/salesforce/rest.py index 55f19330..3f6aed21 100644 --- a/tap_salesforce/salesforce/rest.py +++ b/tap_salesforce/salesforce/rest.py @@ -8,7 +8,7 @@ MAX_RETRIES = 4 -class Rest(object): +class Rest(): def __init__(self, sf): self.sf = sf diff --git a/tap_salesforce/sync.py b/tap_salesforce/sync.py index 8678b2f9..d95d285b 100644 --- a/tap_salesforce/sync.py +++ b/tap_salesforce/sync.py @@ -1,8 +1,7 @@ import time import singer -import singer.metrics as metrics import singer.utils as singer_utils -from singer import Transformer, metadata +from singer import Transformer, metadata, metrics from requests.exceptions import RequestException from tap_salesforce.salesforce.bulk import Bulk From 00da5b5f3135c3d780be5c04a8133337655140c8 Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Mon, 17 Sep 2018 17:29:34 +0000 Subject: [PATCH 3/5] Fix syntax error --- tap_salesforce/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_salesforce/__init__.py b/tap_salesforce/__init__.py index 5515d8cd..3c42f25e 100644 --- a/tap_salesforce/__init__.py +++ b/tap_salesforce/__init__.py @@ -175,7 +175,7 @@ def do_discover(sf): # There are cases where compound fields are referenced by the associated # subfields but are not actually present in the field list - field_name_set = {f['name'] for f in fields]} + field_name_set = {f['name'] for f in fields} filtered_unsupported_fields = [f for f in unsupported_fields if f[0] in field_name_set] missing_unsupported_field_names = [f[0] for f in unsupported_fields if f[0] not in field_name_set] From 68d19d6e8332c00adeaae6854c512f9b90265528 Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Mon, 17 Sep 2018 18:08:52 +0000 Subject: [PATCH 4/5] More pylint --- tap_salesforce/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tap_salesforce/__init__.py b/tap_salesforce/__init__.py index 3c42f25e..6dba6886 100644 --- a/tap_salesforce/__init__.py +++ b/tap_salesforce/__init__.py @@ -243,10 +243,8 @@ def do_discover(sf): unsupported_tag_objects = [object_to_tag_references[f] for f in sf_custom_setting_objects if f in object_to_tag_references] if unsupported_tag_objects: - LOGGER.info( - ("Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + - "are not supported by the Bulk API:")) - LOGGER.info(unsupported_tag_objects) + LOGGER.info("Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + + "are not supported by the Bulk API:\n%s", unsupported_tag_objects) entries = [e for e in entries if e['stream'] not in unsupported_tag_objects] From 34491e21bb533bcff59946db0f5409bb0f873b50 Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Mon, 17 Sep 2018 18:12:47 +0000 Subject: [PATCH 5/5] Pylint --- tap_salesforce/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tap_salesforce/__init__.py b/tap_salesforce/__init__.py index 6dba6886..dbe55884 100644 --- a/tap_salesforce/__init__.py +++ b/tap_salesforce/__init__.py @@ -243,8 +243,10 @@ def do_discover(sf): unsupported_tag_objects = [object_to_tag_references[f] for f in sf_custom_setting_objects if f in object_to_tag_references] if unsupported_tag_objects: - LOGGER.info("Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + - "are not supported by the Bulk API:\n%s", unsupported_tag_objects) + LOGGER.info( #pylint:disable=logging-not-lazy + "Skipping the following Tag objects, Tags on Custom Settings Salesforce objects " + + "are not supported by the Bulk API:") + LOGGER.info(unsupported_tag_objects) entries = [e for e in entries if e['stream'] not in unsupported_tag_objects]