From 930e0111ec275c7479c8448bb1e5aa9c49d8ced9 Mon Sep 17 00:00:00 2001 From: Pat Kearns Date: Fri, 25 Mar 2022 10:29:17 +1100 Subject: [PATCH 1/9] add quoting to split_part --- .../macros/assert_equal_values.sql | 32 +++++++++++ .../test_split_part_quoting.sql | 53 +++++++++++++++++++ macros/cross_db_utils/split_part.sql | 36 +++++++++---- 3 files changed, 111 insertions(+), 10 deletions(-) create mode 100644 integration_tests/macros/assert_equal_values.sql create mode 100644 integration_tests/tests/cross_db_utils/test_split_part_quoting.sql diff --git a/integration_tests/macros/assert_equal_values.sql b/integration_tests/macros/assert_equal_values.sql new file mode 100644 index 00000000..84780e56 --- /dev/null +++ b/integration_tests/macros/assert_equal_values.sql @@ -0,0 +1,32 @@ +{% macro assert_equal_values(actual_object, expected_object) %} +{% if not execute %} + + {# pass #} + +{% elif actual_object != expected_object %} + + {% set msg %} + Expected did not match actual + + ----------- + Actual: + ----------- + --->{{ actual_object }}<--- + + ----------- + Expected: + ----------- + --->{{ expected_object }}<--- + + {% endset %} + + {{ log(msg, info=True) }} + + select 'fail' + +{% else %} + + select 'ok' limit 0 + +{% endif %} +{% endmacro %} \ No newline at end of file diff --git a/integration_tests/tests/cross_db_utils/test_split_part_quoting.sql b/integration_tests/tests/cross_db_utils/test_split_part_quoting.sql new file mode 100644 index 00000000..47944f56 --- /dev/null +++ b/integration_tests/tests/cross_db_utils/test_split_part_quoting.sql @@ -0,0 +1,53 @@ +{%- set singles = 'some string, plus, other stuff' -%} +{%- set doubles = "some string, plus, other stuff" -%} +{%- set double_doubles = "'some string, plus, other stuff'" -%} + +{% set actual_output %} + + select + {{ dbt_utils.split_part(string_text=singles, delimiter_text=',', part_number=1, quote_string_text=False, quote_delimiter_text=False) }} as bad_col1, + {{ dbt_utils.split_part(string_text=doubles, delimiter_text=",", part_number=1, quote_string_text=False, quote_delimiter_text=False) }} as bad_col2, + {{ dbt_utils.split_part(string_text=double_doubles, delimiter_text="','", part_number=1, quote_string_text=False, quote_delimiter_text=False) }} as good_col3 + {{ dbt_utils.split_part(string_text=singles, delimiter_text=',', part_number=1, quote_string_text=True, quote_delimiter_text=True) }} as good_col1, + {{ dbt_utils.split_part(string_text=doubles, delimiter_text=",", part_number=1, quote_string_text=True, quote_delimiter_text=True) }} as good_col2, + {{ dbt_utils.split_part(string_text=double_doubles, delimiter_text="','", part_number=1, quote_string_text=True, quote_delimiter_text=True) }} as bad_col3 + +{% endset %} + +{% set expected_output %} + + select + split_part( + some string, plus, other stuff, + ,, + 1 + ) as bad_col1, + split_part( + some string, plus, other stuff, + ,, + 1 + ) as bad_col2, + split_part( + 'some string, plus, other stuff', + ',', + 1 + ) as good_col3 + split_part( + 'some string, plus, other stuff', + ',', + 1 + ) as good_col1, + split_part( + 'some string, plus, other stuff', + ',', + 1 + ) as good_col2, + split_part( + ''some string, plus, other stuff'', + '','', + 1 + ) as bad_col3 + +{% endset %} + +{{ assert_equal_values (actual_output | trim, expected_output | trim) }} diff --git a/macros/cross_db_utils/split_part.sql b/macros/cross_db_utils/split_part.sql index 036f7d5a..e5d03693 100644 --- a/macros/cross_db_utils/split_part.sql +++ b/macros/cross_db_utils/split_part.sql @@ -1,24 +1,40 @@ -{% macro split_part(string_text, delimiter_text, part_number) %} - {{ return(adapter.dispatch('split_part', 'dbt_utils') (string_text, delimiter_text, part_number)) }} +{% macro split_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) %} + {{ return(adapter.dispatch('split_part', 'dbt_utils') (string_text, delimiter_text, part_number, quote_string_text, quote_delimiter_text)) }} {% endmacro %} -{% macro default__split_part(string_text, delimiter_text, part_number) %} +{% macro default__split_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) -%} split_part( - {{ string_text }}, - {{ delimiter_text }}, + {% if not quote_string_text -%} + {{ string_text }}, + {%- else -%} + '{{ string_text }}', + {%- endif %} + {% if not quote_delimiter_text -%} + {{ delimiter_text }}, + {%- else -%} + '{{ delimiter_text }}', + {%- endif %} {{ part_number }} ) -{% endmacro %} +{%- endmacro %} -{% macro bigquery__split_part(string_text, delimiter_text, part_number) %} +{% macro bigquery__split_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) -%} split( - {{ string_text }}, - {{ delimiter_text }} + {% if not quote_string_text -%} + {{ string_text }}, + {%- else -%} + '{{ string_text }}', + {%- endif %} + {% if not quote_delimiter_text -%} + {{ delimiter_text }} + {%- else -%} + '{{ delimiter_text }}' + {%- endif %} )[safe_offset({{ part_number - 1 }})] -{% endmacro %} +{%- endmacro %} From 49a852d916dbf6f0b2c4c60a22e0e07e4eff2c4e Mon Sep 17 00:00:00 2001 From: Pat Kearns Date: Fri, 25 Mar 2022 11:05:28 +1100 Subject: [PATCH 2/9] update docs for split_part --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 114c12c4..19c1608e 100644 --- a/README.md +++ b/README.md @@ -965,9 +965,22 @@ This macro calculates the difference between two dates. #### split_part ([source](macros/cross_db_utils/split_part.sql)) This macro splits a string of text using the supplied delimiter and returns the supplied part number (1-indexed). +_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) %} + +**Args**: +- `string_text` (required): Text to be split into parts. +- `delimiter_text` (required): Text representing the delimiter to split by. +- `part_number` (required): Requested part of the split (1-based). If the value is negative, the parts are counted backward from the end of the string. +- `quote_string_text` (optional, default=`False`): Normally `string_text` does not need extra quotes, because it is expected to be a column reference and those shouldn't get quotes wrapped around them inside the macro definition. +- `quote_delimiter_text` (optional, default=`False`): If you are quoting from a column, then no quotes needed, but if you input `','` set this to `quote_delimiter_text=True`, to avoid having to input `"','"` + +the delimiter text is more of a pain - i wouldn't be averse to that being quoted inside of the macro so that you only had to single quote it going in. With that said, it could again be that you're trying to split based on a column's value in which case you dont want extra quotes showing up + **Usage:** ``` -{{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1) }} +{{ dbt_utils.split_part(string_text=some_column, delimiter_text="','", part_number=1) }} +{{ dbt_utils.split_part(string_text=some_column, delimiter_text=',', part_number=1, quote_delimiter_text=True) }} +{{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1, quote_string_text=True, quote_delimiter_text=True) }} ``` #### date_trunc ([source](macros/cross_db_utils/date_trunc.sql)) From ab0700413f1f618a13245c49ae0c6a031d45e901 Mon Sep 17 00:00:00 2001 From: Pat Kearns Date: Fri, 25 Mar 2022 11:10:30 +1100 Subject: [PATCH 3/9] typo --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 19c1608e..648d7f21 100644 --- a/README.md +++ b/README.md @@ -965,8 +965,6 @@ This macro calculates the difference between two dates. #### split_part ([source](macros/cross_db_utils/split_part.sql)) This macro splits a string of text using the supplied delimiter and returns the supplied part number (1-indexed). -_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) %} - **Args**: - `string_text` (required): Text to be split into parts. - `delimiter_text` (required): Text representing the delimiter to split by. From 89f7da0d64b4e022e608584567dcdc186a09fa17 Mon Sep 17 00:00:00 2001 From: Pat Kearns Date: Fri, 25 Mar 2022 11:36:14 +1100 Subject: [PATCH 4/9] corrected readme syntax --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 648d7f21..0cf89127 100644 --- a/README.md +++ b/README.md @@ -976,8 +976,9 @@ the delimiter text is more of a pain - i wouldn't be averse to that being quoted **Usage:** ``` -{{ dbt_utils.split_part(string_text=some_column, delimiter_text="','", part_number=1) }} -{{ dbt_utils.split_part(string_text=some_column, delimiter_text=',', part_number=1, quote_delimiter_text=True) }} +{{ dbt_utils.split_part(string_text='some_column', delimiter_text="','", part_number=1) }} +{{ dbt_utils.split_part(string_text='some_column', delimiter_text=',', part_number=1, quote_delimiter_text=True) }} +{{ dbt_utils.split_part(string_text=some_column, delimiter_text=',', part_number=1, quote_string_text=True, quote_delimiter_text=True) }} {{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1, quote_string_text=True, quote_delimiter_text=True) }} ``` From ff69b3701be35fc43fff4ad957486d793dc88c0c Mon Sep 17 00:00:00 2001 From: Pat Kearns Date: Sat, 26 Mar 2022 12:27:04 +1100 Subject: [PATCH 5/9] revert and update to just documentation --- README.md | 8 +-- .../macros/assert_equal_values.sql | 32 ----------- .../test_split_part_quoting.sql | 53 ------------------- macros/cross_db_utils/split_part.sql | 36 ++++--------- 4 files changed, 11 insertions(+), 118 deletions(-) delete mode 100644 integration_tests/macros/assert_equal_values.sql delete mode 100644 integration_tests/tests/cross_db_utils/test_split_part_quoting.sql diff --git a/README.md b/README.md index 0cf89127..0d837ec5 100644 --- a/README.md +++ b/README.md @@ -969,17 +969,11 @@ This macro splits a string of text using the supplied delimiter and returns the - `string_text` (required): Text to be split into parts. - `delimiter_text` (required): Text representing the delimiter to split by. - `part_number` (required): Requested part of the split (1-based). If the value is negative, the parts are counted backward from the end of the string. -- `quote_string_text` (optional, default=`False`): Normally `string_text` does not need extra quotes, because it is expected to be a column reference and those shouldn't get quotes wrapped around them inside the macro definition. -- `quote_delimiter_text` (optional, default=`False`): If you are quoting from a column, then no quotes needed, but if you input `','` set this to `quote_delimiter_text=True`, to avoid having to input `"','"` - -the delimiter text is more of a pain - i wouldn't be averse to that being quoted inside of the macro so that you only had to single quote it going in. With that said, it could again be that you're trying to split based on a column's value in which case you dont want extra quotes showing up **Usage:** ``` {{ dbt_utils.split_part(string_text='some_column', delimiter_text="','", part_number=1) }} -{{ dbt_utils.split_part(string_text='some_column', delimiter_text=',', part_number=1, quote_delimiter_text=True) }} -{{ dbt_utils.split_part(string_text=some_column, delimiter_text=',', part_number=1, quote_string_text=True, quote_delimiter_text=True) }} -{{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1, quote_string_text=True, quote_delimiter_text=True) }} +{{ dbt_utils.split_part(string_text='1,2,3', delimiter_text="'|'", part_number=1) }} ``` #### date_trunc ([source](macros/cross_db_utils/date_trunc.sql)) diff --git a/integration_tests/macros/assert_equal_values.sql b/integration_tests/macros/assert_equal_values.sql deleted file mode 100644 index 84780e56..00000000 --- a/integration_tests/macros/assert_equal_values.sql +++ /dev/null @@ -1,32 +0,0 @@ -{% macro assert_equal_values(actual_object, expected_object) %} -{% if not execute %} - - {# pass #} - -{% elif actual_object != expected_object %} - - {% set msg %} - Expected did not match actual - - ----------- - Actual: - ----------- - --->{{ actual_object }}<--- - - ----------- - Expected: - ----------- - --->{{ expected_object }}<--- - - {% endset %} - - {{ log(msg, info=True) }} - - select 'fail' - -{% else %} - - select 'ok' limit 0 - -{% endif %} -{% endmacro %} \ No newline at end of file diff --git a/integration_tests/tests/cross_db_utils/test_split_part_quoting.sql b/integration_tests/tests/cross_db_utils/test_split_part_quoting.sql deleted file mode 100644 index 47944f56..00000000 --- a/integration_tests/tests/cross_db_utils/test_split_part_quoting.sql +++ /dev/null @@ -1,53 +0,0 @@ -{%- set singles = 'some string, plus, other stuff' -%} -{%- set doubles = "some string, plus, other stuff" -%} -{%- set double_doubles = "'some string, plus, other stuff'" -%} - -{% set actual_output %} - - select - {{ dbt_utils.split_part(string_text=singles, delimiter_text=',', part_number=1, quote_string_text=False, quote_delimiter_text=False) }} as bad_col1, - {{ dbt_utils.split_part(string_text=doubles, delimiter_text=",", part_number=1, quote_string_text=False, quote_delimiter_text=False) }} as bad_col2, - {{ dbt_utils.split_part(string_text=double_doubles, delimiter_text="','", part_number=1, quote_string_text=False, quote_delimiter_text=False) }} as good_col3 - {{ dbt_utils.split_part(string_text=singles, delimiter_text=',', part_number=1, quote_string_text=True, quote_delimiter_text=True) }} as good_col1, - {{ dbt_utils.split_part(string_text=doubles, delimiter_text=",", part_number=1, quote_string_text=True, quote_delimiter_text=True) }} as good_col2, - {{ dbt_utils.split_part(string_text=double_doubles, delimiter_text="','", part_number=1, quote_string_text=True, quote_delimiter_text=True) }} as bad_col3 - -{% endset %} - -{% set expected_output %} - - select - split_part( - some string, plus, other stuff, - ,, - 1 - ) as bad_col1, - split_part( - some string, plus, other stuff, - ,, - 1 - ) as bad_col2, - split_part( - 'some string, plus, other stuff', - ',', - 1 - ) as good_col3 - split_part( - 'some string, plus, other stuff', - ',', - 1 - ) as good_col1, - split_part( - 'some string, plus, other stuff', - ',', - 1 - ) as good_col2, - split_part( - ''some string, plus, other stuff'', - '','', - 1 - ) as bad_col3 - -{% endset %} - -{{ assert_equal_values (actual_output | trim, expected_output | trim) }} diff --git a/macros/cross_db_utils/split_part.sql b/macros/cross_db_utils/split_part.sql index e5d03693..02ebad0b 100644 --- a/macros/cross_db_utils/split_part.sql +++ b/macros/cross_db_utils/split_part.sql @@ -1,40 +1,24 @@ -{% macro split_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) %} - {{ return(adapter.dispatch('split_part', 'dbt_utils') (string_text, delimiter_text, part_number, quote_string_text, quote_delimiter_text)) }} +{% macro split_part(string_text, delimiter_text, part_number) %} + {{ return(adapter.dispatch('split_part', 'dbt_utils') (string_text, delimiter_text, part_number)) }} {% endmacro %} -{% macro default__split_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) -%} +{% macro default__split_part(string_text, delimiter_text, part_number) %} split_part( - {% if not quote_string_text -%} - {{ string_text }}, - {%- else -%} - '{{ string_text }}', - {%- endif %} - {% if not quote_delimiter_text -%} - {{ delimiter_text }}, - {%- else -%} - '{{ delimiter_text }}', - {%- endif %} + {{ string_text }}, + {{ delimiter_text }}, {{ part_number }} ) -{%- endmacro %} +{% endmacro %} -{% macro bigquery__split_part(string_text, delimiter_text, part_number, quote_string_text=False, quote_delimiter_text=False) -%} +{% macro bigquery__split_part(string_text, delimiter_text, part_number) %} split( - {% if not quote_string_text -%} - {{ string_text }}, - {%- else -%} - '{{ string_text }}', - {%- endif %} - {% if not quote_delimiter_text -%} - {{ delimiter_text }} - {%- else -%} - '{{ delimiter_text }}' - {%- endif %} + {{ string_text }}, + {{ delimiter_text }} )[safe_offset({{ part_number - 1 }})] -{%- endmacro %} +{% endmacro %} \ No newline at end of file From 174b0ca7b829ffe98f6f0532742502beb62275f6 Mon Sep 17 00:00:00 2001 From: Pat Kearns Date: Sat, 26 Mar 2022 12:28:53 +1100 Subject: [PATCH 6/9] add new line --- macros/cross_db_utils/split_part.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/cross_db_utils/split_part.sql b/macros/cross_db_utils/split_part.sql index 02ebad0b..036f7d5a 100644 --- a/macros/cross_db_utils/split_part.sql +++ b/macros/cross_db_utils/split_part.sql @@ -21,4 +21,4 @@ {{ delimiter_text }} )[safe_offset({{ part_number - 1 }})] -{% endmacro %} \ No newline at end of file +{% endmacro %} From 6eec96298a0a0b499dee9482bf04624e37528e6a Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Mon, 28 Mar 2022 15:27:28 +1300 Subject: [PATCH 7/9] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d837ec5..00ab91c1 100644 --- a/README.md +++ b/README.md @@ -973,7 +973,7 @@ This macro splits a string of text using the supplied delimiter and returns the **Usage:** ``` {{ dbt_utils.split_part(string_text='some_column', delimiter_text="','", part_number=1) }} -{{ dbt_utils.split_part(string_text='1,2,3', delimiter_text="'|'", part_number=1) }} +{{ dbt_utils.split_part(string_text="'1|2|3'", delimiter_text="'|'", part_number=1) }} ``` #### date_trunc ([source](macros/cross_db_utils/date_trunc.sql)) From 065b491a88060c011c10441d60b6674141076ec9 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Mon, 28 Mar 2022 15:27:33 +1300 Subject: [PATCH 8/9] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 00ab91c1..9c194e44 100644 --- a/README.md +++ b/README.md @@ -972,7 +972,8 @@ This macro splits a string of text using the supplied delimiter and returns the **Usage:** ``` -{{ dbt_utils.split_part(string_text='some_column', delimiter_text="','", part_number=1) }} +When referencing a column, use one pair of quotes. When referencing a string, use single quotes enclosed in double quotes. +{{ dbt_utils.split_part(string_text='column_to_split', delimiter_text='delimiter_column', part_number=1) }} {{ dbt_utils.split_part(string_text="'1|2|3'", delimiter_text="'|'", part_number=1) }} ``` From 9a14bf6891241d1e4f55f4f91b9ee70da02a1121 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Mon, 28 Mar 2022 15:28:16 +1300 Subject: [PATCH 9/9] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c194e44..c012c4b3 100644 --- a/README.md +++ b/README.md @@ -971,8 +971,8 @@ This macro splits a string of text using the supplied delimiter and returns the - `part_number` (required): Requested part of the split (1-based). If the value is negative, the parts are counted backward from the end of the string. **Usage:** -``` When referencing a column, use one pair of quotes. When referencing a string, use single quotes enclosed in double quotes. +``` {{ dbt_utils.split_part(string_text='column_to_split', delimiter_text='delimiter_column', part_number=1) }} {{ dbt_utils.split_part(string_text="'1|2|3'", delimiter_text="'|'", part_number=1) }} ```