From cb576f8c0fab02ce621de67f4bc922400773be2d Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Fri, 14 Oct 2022 18:00:39 +0200 Subject: [PATCH 01/14] Added new "filters" python file, along with a "hash" filter. This can be extended to include other custom filters in the future. --- .../declarative/interpolation/filters.py | 29 +++++++++++++++++++ .../declarative/interpolation/jinja.py | 2 ++ 2 files changed, 31 insertions(+) create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py new file mode 100644 index 0000000000000..8b840a5af5949 --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -0,0 +1,29 @@ +import hashlib + +def hash(value, hash_type="md5", salt=None): + """ + Example filter providing custom Jinja2 filter - hash + + Hash type defaults to 'sha1' if one is not specified + + :param value: value to be hashed + :param hash_type: valid hash type + :return: computed hash as a hexadecimal string + """ + hash_obj = getattr(hashlib, hash_type, None)() + hash_obj.update(str(value).encode("utf-8")) + hash_obj.update(str(salt).encode("utf-8")) + + + if hash_obj: + computed_hash = hash_obj.hexdigest() + else: + raise AttributeError( + "No hashing function named {hname}".format(hname=hash_type) + ) + + return computed_hash + + +_filters_list = [hash] +filters = {f.__name__: f for f in _filters_list} diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py index 883118fb5fdfc..e5f27c8076c98 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -7,6 +7,7 @@ from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation from airbyte_cdk.sources.declarative.interpolation.macros import macros +from airbyte_cdk.sources.declarative.interpolation.filters import filters from airbyte_cdk.sources.declarative.types import Config from jinja2 import Environment from jinja2.exceptions import UndefinedError @@ -32,6 +33,7 @@ class JinjaInterpolation(Interpolation): def __init__(self): self._environment = Environment() + self._environment.filters.update(**filters) self._environment.globals.update(**macros) def eval(self, input_str: str, config: Config, default: Optional[str] = None, **additional_options): From 470a14c05e46528f46d04411e830974741022fd5 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Fri, 14 Oct 2022 18:20:53 +0200 Subject: [PATCH 02/14] Added additional comments --- .../declarative/interpolation/filters.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py index 8b840a5af5949..e6ba4cde99ac8 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -2,12 +2,34 @@ def hash(value, hash_type="md5", salt=None): """ - Example filter providing custom Jinja2 filter - hash + Implementation of a custom Jinja2 hash filter + Hash type defaults to 'md5' if one is not specified. + + If you are using this has function for GDPR compliance, then + you should probably also pass in a salt as discussed in: + https://security.stackexchange.com/questions/202022/hashing-email-addresses-for-gdpr-compliance + + This can be used in a low code connector definition under the AddFields transformation. + For example: + + rates_stream: + $ref: "*ref(definitions.base_stream)" + $options: + name: "rates" + primary_key: "date" + path: "/exchangerates_data/latest" + transformations: + - type: AddFields + fields: + - path: ["some_new_path"] + value: "{{ record['rates']['CAD'] | hash('md5', 'mysalt') }}" + - Hash type defaults to 'sha1' if one is not specified :param value: value to be hashed :param hash_type: valid hash type + :param salt: a salt that will be combined with the value to ensure that the hash created for a given value on this system + is different from the hash created for that value on other systems. :return: computed hash as a hexadecimal string """ hash_obj = getattr(hashlib, hash_type, None)() From b9793523d2e3f2a1f5180d86d14cc47fee00cfe7 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Sat, 15 Oct 2022 18:00:44 +0200 Subject: [PATCH 03/14] Moved usage of the hash_obj inside the conditional that confirms it exists --- .../airbyte_cdk/sources/declarative/interpolation/filters.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py index e6ba4cde99ac8..3c7f37381eae3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -33,11 +33,10 @@ def hash(value, hash_type="md5", salt=None): :return: computed hash as a hexadecimal string """ hash_obj = getattr(hashlib, hash_type, None)() - hash_obj.update(str(value).encode("utf-8")) - hash_obj.update(str(salt).encode("utf-8")) - if hash_obj: + hash_obj.update(str(value).encode("utf-8")) + hash_obj.update(str(salt).encode("utf-8")) computed_hash = hash_obj.hexdigest() else: raise AttributeError( From 139db62ccd1d6f5efceec748ebd887cc717d2007 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Sat, 15 Oct 2022 18:05:08 +0200 Subject: [PATCH 04/14] Moved the hash function call inside a condition to ensure that it exists --- .../airbyte_cdk/sources/declarative/interpolation/filters.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py index 3c7f37381eae3..dd6bd1d66310f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -32,9 +32,10 @@ def hash(value, hash_type="md5", salt=None): is different from the hash created for that value on other systems. :return: computed hash as a hexadecimal string """ - hash_obj = getattr(hashlib, hash_type, None)() + hash_func = getattr(hashlib, hash_type, None) - if hash_obj: + if hash_func: + hash_obj = hash_func() hash_obj.update(str(value).encode("utf-8")) hash_obj.update(str(salt).encode("utf-8")) computed_hash = hash_obj.hexdigest() From 2359a512a9e69cc507f8c4b2e6feb09df0f2b8a1 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Sat, 15 Oct 2022 18:27:06 +0200 Subject: [PATCH 05/14] Fixed the application of the salt , so that it does not modify the hash unless it is actually passed in. --- .../airbyte_cdk/sources/declarative/interpolation/filters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py index dd6bd1d66310f..0e0d2ada5551b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -37,7 +37,8 @@ def hash(value, hash_type="md5", salt=None): if hash_func: hash_obj = hash_func() hash_obj.update(str(value).encode("utf-8")) - hash_obj.update(str(salt).encode("utf-8")) + if salt: + hash_obj.update(str(salt).encode("utf-8")) computed_hash = hash_obj.hexdigest() else: raise AttributeError( From dabe91ea747f63893bbe4545e2472e0f6cb88842 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Mon, 17 Oct 2022 13:54:46 +0200 Subject: [PATCH 06/14] Added unit tests to validate new jinja hash functionality --- .../declarative/interpolation/test_filters.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py new file mode 100644 index 0000000000000..22792652defe4 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py @@ -0,0 +1,45 @@ +import pytest +import hashlib + +from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation + +interpolation = JinjaInterpolation() + + +def test_hash_md5_no_salt(): + input_string = 'abcd' + s = "{{ '%s' | hash('md5') }}"% input_string + filter_hash = interpolation.eval(s, config={}) + + # compute expected hash calling hashlib directly + hash_obj = hashlib.md5() + hash_obj.update(str(input_string).encode("utf-8")) + hashlib_computed_hash = hash_obj.hexdigest() + + assert filter_hash == hashlib_computed_hash + +def test_hash_md5_on_numeric_value(): + input_value = 123.456 + s = "{{ '%s' | hash('md5') }}" % input_value + filter_hash = interpolation.eval(s, config={}) + + # compute expected hash calling hashlib directly + hash_obj = hashlib.md5() + hash_obj.update(str(input_value).encode("utf-8")) + hashlib_computed_hash = hash_obj.hexdigest() + + assert filter_hash == hashlib_computed_hash + +def test_hash_md5_with_salt(): + input_string = 'test_input_string' + input_salt = 'test_input_salt' + + s = "{{ '%s' | hash('md5', '%s' ) }}" % (input_string, input_salt) + filter_hash = interpolation.eval(s, config={}) + + # compute expected value calling hashlib directly + hash_obj = hashlib.md5() + hash_obj.update(str(input_string + input_salt).encode("utf-8")) + hashlib_computed_hash = hash_obj.hexdigest() + + assert filter_hash == hashlib_computed_hash From 983bb85cf792d6d56e72efd9069ac30643908a83 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Mon, 17 Oct 2022 14:44:27 +0200 Subject: [PATCH 07/14] Updated unit test to pass numeric value as a float instead of string --- .../sources/declarative/interpolation/test_filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py index 22792652defe4..5593a96ba2592 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py @@ -20,7 +20,7 @@ def test_hash_md5_no_salt(): def test_hash_md5_on_numeric_value(): input_value = 123.456 - s = "{{ '%s' | hash('md5') }}" % input_value + s = "{{ %f | hash('md5') }}" % input_value filter_hash = interpolation.eval(s, config={}) # compute expected hash calling hashlib directly From 8fb64e5c87201290e4e5303286e4ce41459ccbc9 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Mon, 17 Oct 2022 18:22:17 +0200 Subject: [PATCH 08/14] Removed unreferenced import to pytest --- .../sources/declarative/interpolation/test_filters.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py index 5593a96ba2592..b8b6ab09b9ec1 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py @@ -1,6 +1,4 @@ -import pytest import hashlib - from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation interpolation = JinjaInterpolation() From 293630529ffae8cbe65c380eaf68bcef80f54b93 Mon Sep 17 00:00:00 2001 From: "alex.marquardt@airbyte.io" Date: Mon, 17 Oct 2022 21:48:09 +0200 Subject: [PATCH 09/14] Updated version --- airbyte-cdk/python/CHANGELOG.md | 4 ++++ airbyte-cdk/python/setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 24aa5cca36997..56888cc426d69 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.1.102 + +- Low-code: added hash filter to jinja template + ## 0.1.101 - Low-code: $options do not overwrite parameters that are already set diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index addef435c91fc..ef278c81b2f18 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.1.101", + version="0.1.102", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", From 8b5fb646ea8a83684e40eda91eae93e62b61e7ef Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 17 Oct 2022 17:41:56 -0700 Subject: [PATCH 10/14] format --- .../declarative/interpolation/test_filters.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py index b8b6ab09b9ec1..72423342121ca 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py @@ -1,12 +1,16 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# import hashlib + from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation interpolation = JinjaInterpolation() def test_hash_md5_no_salt(): - input_string = 'abcd' - s = "{{ '%s' | hash('md5') }}"% input_string + input_string = "abcd" + s = "{{ '%s' | hash('md5') }}" % input_string filter_hash = interpolation.eval(s, config={}) # compute expected hash calling hashlib directly @@ -16,6 +20,7 @@ def test_hash_md5_no_salt(): assert filter_hash == hashlib_computed_hash + def test_hash_md5_on_numeric_value(): input_value = 123.456 s = "{{ %f | hash('md5') }}" % input_value @@ -28,9 +33,10 @@ def test_hash_md5_on_numeric_value(): assert filter_hash == hashlib_computed_hash + def test_hash_md5_with_salt(): - input_string = 'test_input_string' - input_salt = 'test_input_salt' + input_string = "test_input_string" + input_salt = "test_input_salt" s = "{{ '%s' | hash('md5', '%s' ) }}" % (input_string, input_salt) filter_hash = interpolation.eval(s, config={}) From f5c332e75260b64f4b823f09c666e9567b4e7afe Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 17 Oct 2022 18:15:03 -0700 Subject: [PATCH 11/14] format --- .../declarative/interpolation/filters.py | 66 ++++++++++--------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py index 0e0d2ada5551b..a0fdc01d5f067 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -1,36 +1,40 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# import hashlib + def hash(value, hash_type="md5", salt=None): """ - Implementation of a custom Jinja2 hash filter - Hash type defaults to 'md5' if one is not specified. - - If you are using this has function for GDPR compliance, then - you should probably also pass in a salt as discussed in: - https://security.stackexchange.com/questions/202022/hashing-email-addresses-for-gdpr-compliance - - This can be used in a low code connector definition under the AddFields transformation. - For example: - - rates_stream: - $ref: "*ref(definitions.base_stream)" - $options: - name: "rates" - primary_key: "date" - path: "/exchangerates_data/latest" - transformations: - - type: AddFields - fields: - - path: ["some_new_path"] - value: "{{ record['rates']['CAD'] | hash('md5', 'mysalt') }}" - - - - :param value: value to be hashed - :param hash_type: valid hash type - :param salt: a salt that will be combined with the value to ensure that the hash created for a given value on this system - is different from the hash created for that value on other systems. - :return: computed hash as a hexadecimal string + Implementation of a custom Jinja2 hash filter + Hash type defaults to 'md5' if one is not specified. + + If you are using this has function for GDPR compliance, then + you should probably also pass in a salt as discussed in: + https://security.stackexchange.com/questions/202022/hashing-email-addresses-for-gdpr-compliance + + This can be used in a low code connector definition under the AddFields transformation. + For example: + + rates_stream: + $ref: "*ref(definitions.base_stream)" + $options: + name: "rates" + primary_key: "date" + path: "/exchangerates_data/latest" + transformations: + - type: AddFields + fields: + - path: ["some_new_path"] + value: "{{ record['rates']['CAD'] | hash('md5', 'mysalt') }}" + + + + :param value: value to be hashed + :param hash_type: valid hash type + :param salt: a salt that will be combined with the value to ensure that the hash created for a given value on this system + is different from the hash created for that value on other systems. + :return: computed hash as a hexadecimal string """ hash_func = getattr(hashlib, hash_type, None) @@ -41,9 +45,7 @@ def hash(value, hash_type="md5", salt=None): hash_obj.update(str(salt).encode("utf-8")) computed_hash = hash_obj.hexdigest() else: - raise AttributeError( - "No hashing function named {hname}".format(hname=hash_type) - ) + raise AttributeError("No hashing function named {hname}".format(hname=hash_type)) return computed_hash From b2b86d95c0bc9c9187603bc784b315a1f57abf97 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 17 Oct 2022 18:19:19 -0700 Subject: [PATCH 12/14] format --- .../airbyte_cdk/sources/declarative/interpolation/jinja.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py index e5f27c8076c98..7fac960fcb27a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -5,9 +5,9 @@ import ast from typing import Optional +from airbyte_cdk.sources.declarative.interpolation.filters import filters from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation from airbyte_cdk.sources.declarative.interpolation.macros import macros -from airbyte_cdk.sources.declarative.interpolation.filters import filters from airbyte_cdk.sources.declarative.types import Config from jinja2 import Environment from jinja2.exceptions import UndefinedError From 1055f693ed1d3acf782b4a186b4d682472371f9e Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 17 Oct 2022 18:48:29 -0700 Subject: [PATCH 13/14] format --- .../airbyte_cdk/sources/declarative/interpolation/filters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py index a0fdc01d5f067..f707f42469e71 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -1,6 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + import hashlib From addfb063cb903bac0f3d5f56217d2fc1abc16de7 Mon Sep 17 00:00:00 2001 From: Alexandre Girard Date: Mon, 17 Oct 2022 20:00:31 -0700 Subject: [PATCH 14/14] format --- .../unit_tests/sources/declarative/interpolation/test_filters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py index 72423342121ca..96dc423f91daa 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_filters.py @@ -1,6 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + import hashlib from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation