From 160f6b96270605a4d17b527d9ddcb4879a296d05 Mon Sep 17 00:00:00 2001 From: Chris Goddard Date: Wed, 20 Feb 2019 16:50:53 -0800 Subject: [PATCH 1/6] Support filtering of nested fields Update filter_data_by_metadata function to allow filtering of nested fields - e.g. if property `address` has selected set to True, but property `address.street` has selected set to False, only the street would be excluded. Processes data recursively. --- singer/transform.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/singer/transform.py b/singer/transform.py index 82888c2..0ead203 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -35,6 +35,13 @@ def unix_seconds_to_datetime(value): return strftime(datetime.datetime.fromtimestamp(int(value), datetime.timezone.utc)) +def breadcrumb_path(breadcrumb): + name = ".".join(breadcrumb) + name = name.replace('properties.', '') + name = name.replace('.items', '[]') + return name + + class SchemaMismatch(Exception): def __init__(self, errors): if not errors: @@ -101,21 +108,23 @@ def __enter__(self): def __exit__(self, *args): self.log_warning() - def filter_data_by_metadata(self, data, metadata): + def filter_data_by_metadata(self, data, metadata, parent=()): if isinstance(data, dict) and metadata: - for field_name in list(data.keys()): - selected = singer.metadata.get(metadata, ('properties', field_name), 'selected') - inclusion = singer.metadata.get(metadata, ('properties', field_name), 'inclusion') + for field_name, field_data in data.items(): + breadcrumb = parent + ('properties', field_name) + selected = singer.metadata.get(metadata, breadcrumb, 'selected') + inclusion = singer.metadata.get(metadata, breadcrumb, 'inclusion') if inclusion == 'automatic': continue - if selected is False: - data.pop(field_name, None) - self.filtered.add(field_name) + if (selected is False) or (inclusion == 'unsupported'): + data[field_name] = None + self.filtered.add(breadcrumb_path(breadcrumb)) + + data[field_name] = self.filter_data_by_metadata(field_data, metadata, breadcrumb) - if inclusion == 'unsupported': - data.pop(field_name, None) - self.filtered.add(field_name) + if isinstance(data, list) and metadata: + data = [self.filter_data_by_metadata(d, metadata, parent) for d in data] return data From c76e52f94885e221359ab5b42061fe7b1fb2b888 Mon Sep 17 00:00:00 2001 From: Chris Goddard Date: Wed, 20 Feb 2019 16:52:32 -0800 Subject: [PATCH 2/6] Update transform.py make formatting a little clearer --- singer/transform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/singer/transform.py b/singer/transform.py index 0ead203..e156034 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -124,7 +124,8 @@ def filter_data_by_metadata(self, data, metadata, parent=()): data[field_name] = self.filter_data_by_metadata(field_data, metadata, breadcrumb) if isinstance(data, list) and metadata: - data = [self.filter_data_by_metadata(d, metadata, parent) for d in data] + breadcrumb = parent + ('items', field_name) + data = [self.filter_data_by_metadata(d, metadata, breadcrumb) for d in data] return data From 68a8a82659ad295eed18de48d4bf0e0f37a94a46 Mon Sep 17 00:00:00 2001 From: Chris Goddard Date: Wed, 20 Feb 2019 16:54:41 -0800 Subject: [PATCH 3/6] Update transform.py Fix array type breadcrumb name --- singer/transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/singer/transform.py b/singer/transform.py index e156034..1747966 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -124,8 +124,8 @@ def filter_data_by_metadata(self, data, metadata, parent=()): data[field_name] = self.filter_data_by_metadata(field_data, metadata, breadcrumb) if isinstance(data, list) and metadata: - breadcrumb = parent + ('items', field_name) - data = [self.filter_data_by_metadata(d, metadata, breadcrumb) for d in data] + breadcrumb = parent + ('items',) + data = [self.filter_data_by_metadata(d, metadata, parent + ('items', )) for d in data] return data From 3547c58b0e898f6d21c5fc9cb43b7dec35a6ad24 Mon Sep 17 00:00:00 2001 From: Chris Goddard Date: Wed, 20 Feb 2019 16:55:29 -0800 Subject: [PATCH 4/6] Update transform.py breadcrumb path documentation --- singer/transform.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/singer/transform.py b/singer/transform.py index 1747966..49a7989 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -36,6 +36,9 @@ def unix_seconds_to_datetime(value): def breadcrumb_path(breadcrumb): + """ + Transform breadcrumb into familiar object dot-notation + """ name = ".".join(breadcrumb) name = name.replace('properties.', '') name = name.replace('.items', '[]') From 9b7dd7837020af3fdb863d9d66e0c9a264a464ec Mon Sep 17 00:00:00 2001 From: Chris Goddard Date: Wed, 20 Feb 2019 17:16:35 -0800 Subject: [PATCH 5/6] Update transform.py change based on tests - must remove field from data object, not just set value to None. --- singer/transform.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/singer/transform.py b/singer/transform.py index 49a7989..23bd33a 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -113,7 +113,7 @@ def __exit__(self, *args): def filter_data_by_metadata(self, data, metadata, parent=()): if isinstance(data, dict) and metadata: - for field_name, field_data in data.items(): + for field_name in list(data.keys()): breadcrumb = parent + ('properties', field_name) selected = singer.metadata.get(metadata, breadcrumb, 'selected') inclusion = singer.metadata.get(metadata, breadcrumb, 'inclusion') @@ -121,10 +121,10 @@ def filter_data_by_metadata(self, data, metadata, parent=()): continue if (selected is False) or (inclusion == 'unsupported'): - data[field_name] = None + data.pop(field_name, None) self.filtered.add(breadcrumb_path(breadcrumb)) - - data[field_name] = self.filter_data_by_metadata(field_data, metadata, breadcrumb) + else: + data[field_name] = self.filter_data_by_metadata(data[field_name], metadata, breadcrumb) if isinstance(data, list) and metadata: breadcrumb = parent + ('items',) From acbe4847087d5f9061440fd6cebf75c88d875c83 Mon Sep 17 00:00:00 2001 From: Chris Goddard Date: Wed, 20 Feb 2019 17:35:19 -0800 Subject: [PATCH 6/6] Update transform.py line lenght :) --- singer/transform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/singer/transform.py b/singer/transform.py index 23bd33a..49dd00e 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -124,7 +124,8 @@ def filter_data_by_metadata(self, data, metadata, parent=()): data.pop(field_name, None) self.filtered.add(breadcrumb_path(breadcrumb)) else: - data[field_name] = self.filter_data_by_metadata(data[field_name], metadata, breadcrumb) + data[field_name] = self.filter_data_by_metadata( + data[field_name], metadata, breadcrumb) if isinstance(data, list) and metadata: breadcrumb = parent + ('items',)