From 80e72c59cb3a166d69e5a32b67aca18026c87878 Mon Sep 17 00:00:00 2001 From: Joe Markiewicz <74217849+fivetran-joemarkiewicz@users.noreply.github.com> Date: Mon, 13 Dec 2021 20:27:00 -0600 Subject: [PATCH 01/25] Fix/timestamp withought timezone (#458) * timestamp and changelog updates * changelog fix * Add context for why change to no timezone Co-authored-by: Joel Labes --- .github/pull_request_template.md | 2 +- CHANGELOG.md | 3 +++ macros/cross_db_utils/datatypes.sql | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 7d8278dd..3a381a66 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,5 +1,5 @@ This is a: -- [ ] bug fix PR with no breaking changes — please ensure the base branch is `master` +- [ ] bug fix PR with no breaking changes — please ensure the base branch is `main` - [ ] new functionality — please ensure the base branch is the latest `dev/` branch - [ ] a breaking change — please ensure the base branch is the latest `dev/` branch diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ad5e3e0..b64c27c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# dbt-utils (next version) +## Fixes +- `type_timestamp` macro now explicitly casts postgres and redshift warehouse timestamp data types as `timestamp without time zone`, to be consistent with Snowflake behaviour (`timestamp_ntz`). # dbt-utils v0.8.0 ## 🚨 Breaking changes - dbt ONE POINT OH is here! This version of dbt-utils requires _any_ version (minor and patch) of v1, which means far less need for compatibility releases in the future. diff --git a/macros/cross_db_utils/datatypes.sql b/macros/cross_db_utils/datatypes.sql index 3dc532b8..f115b4e2 100644 --- a/macros/cross_db_utils/datatypes.sql +++ b/macros/cross_db_utils/datatypes.sql @@ -32,6 +32,10 @@ timestamp {% endmacro %} +{% macro postgres__type_timestamp() %} + timestamp without time zone +{% endmacro %} + {% macro snowflake__type_timestamp() %} timestamp_ntz {% endmacro %} From e195076810e861f48cf446f2dfe20070a6189a5a Mon Sep 17 00:00:00 2001 From: Anders Date: Wed, 5 Jan 2022 20:15:45 -0800 Subject: [PATCH 02/25] also ignore dbt_packages (#463) * also ignore dbt_packages * Update CHANGELOG.md * Update CHANGELOG.md Co-authored-by: Joel Labes --- .gitignore | 3 ++- CHANGELOG.md | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index e409c400..49ca155a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ target/ dbt_modules/ +dbt_packages/ logs/ -venv/ \ No newline at end of file +venv/ diff --git a/CHANGELOG.md b/CHANGELOG.md index b64c27c4..8399441a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ -# dbt-utils (next version) +# dbt-utils v0.8.1 + +## Under the hood +- also ignore `dbt_packages/` directory [#463](https://github.com/dbt-labs/dbt-utils/pull/463) + ## Fixes - `type_timestamp` macro now explicitly casts postgres and redshift warehouse timestamp data types as `timestamp without time zone`, to be consistent with Snowflake behaviour (`timestamp_ntz`). + # dbt-utils v0.8.0 ## 🚨 Breaking changes - dbt ONE POINT OH is here! This version of dbt-utils requires _any_ version (minor and patch) of v1, which means far less need for compatibility releases in the future. From 58b872604655e81f10b9cf7f21dddba850aafd28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mika=C3=ABl=20Simarik?= Date: Thu, 6 Jan 2022 05:35:29 +0100 Subject: [PATCH 03/25] date_spine: transform comment to jinja (#462) --- CHANGELOG.md | 3 ++- macros/sql/date_spine.sql | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8399441a..b1861c41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ ## 🚨 Breaking changes - dbt ONE POINT OH is here! This version of dbt-utils requires _any_ version (minor and patch) of v1, which means far less need for compatibility releases in the future. - The partition column in the `mutually_exclusive_ranges` test is now always called `partition_by_col`. This enables compatibility with `--store-failures` when multiple columns are concatenated together. If you have models built on top of the failures table, update them to reflect the new column name. ([#423](https://github.com/dbt-labs/dbt-utils/issues/423), [#430](https://github.com/dbt-labs/dbt-utils/pull/430)) - +## Under the hood +- make date_spine macro compatible with the Athena connector (#462) ## Contributors: - [codigo-ergo-sum](https://github.com/codigo-ergo-sum) (#430) diff --git a/macros/sql/date_spine.sql b/macros/sql/date_spine.sql index 6998bd2e..759f8d59 100644 --- a/macros/sql/date_spine.sql +++ b/macros/sql/date_spine.sql @@ -29,16 +29,15 @@ {% macro default__date_spine(datepart, start_date, end_date) %} -/* -call as follows: + +{# call as follows: date_spine( "day", "to_date('01/01/2016', 'mm/dd/yyyy')", "dateadd(week, 1, current_date)" -) +) #} -*/ with rawdata as ( From 900365a7b964578bb0dd4ac0834df0a97b932d41 Mon Sep 17 00:00:00 2001 From: Graham Wetzler Date: Mon, 24 Jan 2022 14:21:01 -0600 Subject: [PATCH 04/25] Have union_relations raise exception when include parameter results in no columns (#473) * Raise exception if no columns in column_superset * Add relation names to compiler error message * Add `union_relations` fix to changelog --- CHANGELOG.md | 6 +++++- macros/sql/union.sql | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1861c41..59145411 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,11 @@ - also ignore `dbt_packages/` directory [#463](https://github.com/dbt-labs/dbt-utils/pull/463) ## Fixes -- `type_timestamp` macro now explicitly casts postgres and redshift warehouse timestamp data types as `timestamp without time zone`, to be consistent with Snowflake behaviour (`timestamp_ntz`). +- `type_timestamp` macro now explicitly casts postgres and redshift warehouse timestamp data types as `timestamp without time zone`, to be consistent with Snowflake behaviour (`timestamp_ntz`). +- `union_relations` macro will now raise an exception if the use of `include` or `exclude` results in no columns ([#473](https://github.com/dbt-labs/dbt-utils/pull/473), [#266](https://github.com/dbt-labs/dbt-utils/issues/266)). + +## Contributors: +- [grahamwetzler](https://github.com/grahamwetzler) (#473) # dbt-utils v0.8.0 ## 🚨 Breaking changes diff --git a/macros/sql/union.sql b/macros/sql/union.sql index 13bafe31..009a765a 100644 --- a/macros/sql/union.sql +++ b/macros/sql/union.sql @@ -61,6 +61,21 @@ {%- set ordered_column_names = column_superset.keys() -%} + {%- if not column_superset.keys() -%} + {%- set relations_string -%} + {%- for relation in relations -%} + {{ relation.name }} + {%- if not loop.last %}, {% endif -%} + {%- endfor -%} + {%- endset -%} + + {%- set error_message -%} + There were no columns found to union for relations {{ relations_string }} + {%- endset -%} + + {{ exceptions.raise_compiler_error(error_message) }} + {%- endif -%} + {%- for relation in relations %} ( From ae65d05e333b93be9c268bd4e7196e4b22d10f07 Mon Sep 17 00:00:00 2001 From: Taras <32882370+Aesthet@users.noreply.github.com> Date: Tue, 1 Feb 2022 05:15:11 +0200 Subject: [PATCH 05/25] Added case for handling postgres foreign tables... (#476) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add link for fewer_rows_than schema test in docs (#465) * Added case for handling postgres foreign tables (tables which are external to current database and are imported into current database from remote data stores by using Foreign Data Wrappers functionallity). * Reworked getting of postges table_type. * Added needed changes to CHANGELOG. Co-authored-by: José Coto Co-authored-by: Taras Stetsiak --- CHANGELOG.md | 6 ++++++ macros/sql/get_table_types_sql.sql | 22 ++++++++++++++++++++++ macros/sql/get_tables_by_pattern_sql.sql | 7 +------ 3 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 macros/sql/get_table_types_sql.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 59145411..f0140657 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,10 +14,16 @@ ## 🚨 Breaking changes - dbt ONE POINT OH is here! This version of dbt-utils requires _any_ version (minor and patch) of v1, which means far less need for compatibility releases in the future. - The partition column in the `mutually_exclusive_ranges` test is now always called `partition_by_col`. This enables compatibility with `--store-failures` when multiple columns are concatenated together. If you have models built on top of the failures table, update them to reflect the new column name. ([#423](https://github.com/dbt-labs/dbt-utils/issues/423), [#430](https://github.com/dbt-labs/dbt-utils/pull/430)) + +## Fixes +- `get_relations_by_pattern()` now uses additional sub macros `get_table_types_sql()` to determine table types for different database engines. ([#357](https://github.com/dbt-labs/dbt-utils/issues/357), [#476](https://github.com/dbt-labs/dbt-utils/pull/476)) + ## Under the hood - make date_spine macro compatible with the Athena connector (#462) + ## Contributors: - [codigo-ergo-sum](https://github.com/codigo-ergo-sum) (#430) +- [Aesthet](https://github.com/Aesthet) (#476) # dbt-utils 0.7.5 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (🎉). Projects using dbt-utils 0.7.4 with dbt-core v1.0.0 can expect to see a deprecation warning. This will be resolved in dbt_utils v0.8.0. diff --git a/macros/sql/get_table_types_sql.sql b/macros/sql/get_table_types_sql.sql new file mode 100644 index 00000000..e3f86884 --- /dev/null +++ b/macros/sql/get_table_types_sql.sql @@ -0,0 +1,22 @@ +{%- macro get_table_types_sql() -%} + {{ return(adapter.dispatch('get_table_types_sql', 'dbt_utils')()) }} +{%- endmacro -%} + +{% macro default__get_table_types_sql() %} + case table_type + when 'BASE TABLE' then 'table' + when 'EXTERNAL TABLE' then 'external' + when 'MATERIALIZED VIEW' then 'materializedview' + else lower(table_type) + end as "table_type" +{% endmacro %} + + +{% macro postgres__get_table_types_sql() %} + case table_type + when 'BASE TABLE' then 'table' + when 'FOREIGN' then 'external' + when 'MATERIALIZED VIEW' then 'materializedview' + else lower(table_type) + end as "table_type" +{% endmacro %} diff --git a/macros/sql/get_tables_by_pattern_sql.sql b/macros/sql/get_tables_by_pattern_sql.sql index 9185875c..93f3c6a6 100644 --- a/macros/sql/get_tables_by_pattern_sql.sql +++ b/macros/sql/get_tables_by_pattern_sql.sql @@ -8,12 +8,7 @@ select distinct table_schema as "table_schema", table_name as "table_name", - case table_type - when 'BASE TABLE' then 'table' - when 'EXTERNAL TABLE' then 'external' - when 'MATERIALIZED VIEW' then 'materializedview' - else lower(table_type) - end as "table_type" + {{ dbt_utils.get_table_types_sql() }} from {{ database }}.information_schema.tables where table_schema ilike '{{ schema_pattern }}' and table_name ilike '{{ table_pattern }}' From c863448b692a4e3fb02332a017a0ef9192e6f3f8 Mon Sep 17 00:00:00 2001 From: nickperrott <46330920+nickperrott@users.noreply.github.com> Date: Thu, 17 Feb 2022 20:15:22 +1300 Subject: [PATCH 06/25] Enhance usability of star macro by only generating column aliases when prefix and/or suffix is specified (#468) * The star macro should only produce column aliases when there is either a prefix or suffix specified. * Enhanced the readme for the star macro. * Add new integration test Co-authored-by: Nick Perrott Co-authored-by: Josh Elston-Green Co-authored-by: Joel Labes --- README.md | 4 +++- .../data/sql/data_star_aggregate.csv | 5 +++++ .../data/sql/data_star_aggregate_expected.csv | 4 ++++ integration_tests/models/sql/schema.yml | 5 +++++ .../models/sql/test_star_aggregate.sql | 16 ++++++++++++++++ macros/sql/star.sql | 2 +- 6 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 integration_tests/data/sql/data_star_aggregate.csv create mode 100644 integration_tests/data/sql/data_star_aggregate_expected.csv create mode 100644 integration_tests/models/sql/test_star_aggregate.sql diff --git a/README.md b/README.md index 70346771..afb8301d 100644 --- a/README.md +++ b/README.md @@ -729,7 +729,9 @@ group by 1,2,3 ``` #### star ([source](macros/sql/star.sql)) -This macro generates a list of all fields that exist in the `from` relation, excluding any fields listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with the star macro. This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias (`relation_alias`.`field_name`). The macro also has optional `prefix` and `suffix` arguments, which will be appropriately concatenated to each field name in the output (`prefix` ~ `field_name` ~ `suffix`). +This macro generates a comma-separated list of all fields that exist in the `from` relation, excluding any fields listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with the star macro. This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias (`relation_alias`.`field_name`). + +The macro also has optional `prefix` and `suffix` arguments. When one or both are provided, they will be concatenated onto each field's alias in the output (`prefix` ~ `field_name` ~ `suffix`). NB: This prevents the output from being used in any context other than a select statement. **Usage:** ```sql diff --git a/integration_tests/data/sql/data_star_aggregate.csv b/integration_tests/data/sql/data_star_aggregate.csv new file mode 100644 index 00000000..19d47446 --- /dev/null +++ b/integration_tests/data/sql/data_star_aggregate.csv @@ -0,0 +1,5 @@ +group_field_1,group_field_2,value_field +a,b,1 +a,b,2 +c,d,3 +c,e,4 \ No newline at end of file diff --git a/integration_tests/data/sql/data_star_aggregate_expected.csv b/integration_tests/data/sql/data_star_aggregate_expected.csv new file mode 100644 index 00000000..d7056386 --- /dev/null +++ b/integration_tests/data/sql/data_star_aggregate_expected.csv @@ -0,0 +1,4 @@ +group_field_1,group_field_2,value_field_sum +a,b,3 +c,d,3 +c,e,4 \ No newline at end of file diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index 5b341457..e8d11f4e 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -111,6 +111,11 @@ models: - dbt_utils.equality: compare_model: ref('data_star_prefix_suffix_expected') + - name: test_star_aggregate + tests: + - dbt_utils.equality: + compare_model: ref('data_star_aggregate_expected') + - name: test_surrogate_key tests: - assert_equal: diff --git a/integration_tests/models/sql/test_star_aggregate.sql b/integration_tests/models/sql/test_star_aggregate.sql new file mode 100644 index 00000000..102f0226 --- /dev/null +++ b/integration_tests/models/sql/test_star_aggregate.sql @@ -0,0 +1,16 @@ +{#-/*This test checks that column aliases aren't applied unless there's a prefix/suffix necessary, to ensure that GROUP BYs keep working*/-#} + +{% set selected_columns = dbt_utils.star(from=ref('data_star_aggregate'), except=['value_field']) %} + +with data as ( + + select + {{ selected_columns }}, + sum(value_field) as value_field_sum + + from {{ ref('data_star_aggregate') }} + group by {{ selected_columns }} + +) + +select * from data diff --git a/macros/sql/star.sql b/macros/sql/star.sql index ffe48946..0bfa8c9a 100644 --- a/macros/sql/star.sql +++ b/macros/sql/star.sql @@ -24,7 +24,7 @@ {%- for col in include_cols %} - {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} + {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} {%- if prefix!='' or suffix!='' -%} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} {%- endif -%} {%- if not loop.last %},{{ '\n ' }}{% endif %} {%- endfor -%} From 8b7356b1cb5cf36622f26a2e51a317f3f4c7b6bf Mon Sep 17 00:00:00 2001 From: Ted Conbeer Date: Thu, 17 Feb 2022 19:22:24 -0700 Subject: [PATCH 07/25] fix: extra brace typo in insert_by_period_materialization (#480) --- README.md | 33 +++++++++++++------ .../insert_by_period_materialization.sql | 2 +- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index afb8301d..e73a04a9 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this **[Schema tests](#schema-tests)** - [equal_rowcount](#equal_rowcount-source) + - [fewer_rows_than](#fewer_rows_than-source) - [equality](#equality-source) - [expression_is_true](#expression_is_true-source) - [recency](#recency-source) @@ -228,6 +229,8 @@ models: #### unique_where ([source](macros/schema_tests/test_unique_where.sql)) This test validates that there are no duplicate values present in a field for a subset of rows by specifying a `where` clause. +*Warning*: This test is no longer supported. Starting in dbt v0.20.0, the built-in `unique` test supports a `where` config. [See the dbt docs for more details](https://docs.getdbt.com/reference/resource-configs/where). + **Usage:** ```yaml version: 2 @@ -244,6 +247,8 @@ models: #### not_null_where ([source](macros/schema_tests/test_not_null_where.sql)) This test validates that there are no null values present in a column for a subset of rows by specifying a `where` clause. +*Warning*: This test is no longer supported. Starting in dbt v0.20.0, the built-in `not_null` test supports a `where` config. [See the dbt docs for more details](https://docs.getdbt.com/reference/resource-configs/where). + **Usage:** ```yaml version: 2 @@ -657,17 +662,25 @@ This macro returns a dictionary from a sql query, so that you don't need to inte **Usage:** ``` --- Returns a dictionary of the users table where the state is California -{% set california_cities = dbt_utils.get_query_results_as_dict("select * from" ~ ref('cities') ~ "where state = 'CA' and city is not null ") %} +{% set sql_statement %} + select city, state from {{ ref('users) }} +{% endset %} + +{%- set places = dbt_utils.get_query_results_as_dict(sql_statement) -%} + select - city, -{% for city in california_cities %} - sum(case when city = {{ city }} then 1 else 0 end) as users_in_{{ city }}, -{% endfor %} - count(*) as total -from {{ ref('users') }} -group by 1 + {% for city in places['CITY'] | unique -%} + sum(case when city = '{{ city }}' then 1 else 0 end) as users_in_{{ dbt_utils.slugify(city) }}, + {% endfor %} + + {% for state in places['STATE'] | unique -%} + sum(case when state = '{{ state }}' then 1 else 0 end) as users_in_{{ state }}, + {% endfor %} + + count(*) as total_total + +from {{ ref('users') }} ``` ### SQL generators @@ -1044,7 +1057,7 @@ select order_id, {%- for payment_method in payment_methods %} sum(case when payment_method = '{{ payment_method }}' then amount end) - as {{ slugify(payment_method) }}_amount, + as {{ dbt_utils.slugify(payment_method) }}_amount, {% endfor %} ... diff --git a/macros/materializations/insert_by_period_materialization.sql b/macros/materializations/insert_by_period_materialization.sql index 851afa3d..5a15c815 100644 --- a/macros/materializations/insert_by_period_materialization.sql +++ b/macros/materializations/insert_by_period_materialization.sql @@ -53,7 +53,7 @@ {% materialization insert_by_period, default -%} {%- set timestamp_field = config.require('timestamp_field') -%} {%- set start_date = config.require('start_date') -%} - {%- set stop_date = config.get('stop_date') or '' -%}} + {%- set stop_date = config.get('stop_date') or '' -%} {%- set period = config.get('period') or 'week' -%} {%- if sql.find('__PERIOD_FILTER__') == -1 -%} From 14127aaed63260eb1aa364406a13ead69257d083 Mon Sep 17 00:00:00 2001 From: Armand Duijn Date: Fri, 18 Feb 2022 03:35:21 +0100 Subject: [PATCH 08/25] Support quoted column names in sequential_values test (#479) --- macros/schema_tests/sequential_values.sql | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/macros/schema_tests/sequential_values.sql b/macros/schema_tests/sequential_values.sql index 83eee0c0..8ddae707 100644 --- a/macros/schema_tests/sequential_values.sql +++ b/macros/schema_tests/sequential_values.sql @@ -6,13 +6,15 @@ {% macro default__test_sequential_values(model, column_name, interval=1, datepart=None) %} +{% set previous_column_name = "previous_" ~ dbt_utils.slugify(column_name) %} + with windowed as ( select {{ column_name }}, lag({{ column_name }}) over ( order by {{ column_name }} - ) as previous_{{ column_name }} + ) as {{ previous_column_name }} from {{ model }} ), @@ -21,9 +23,9 @@ validation_errors as ( * from windowed {% if datepart %} - where not(cast({{ column_name }} as {{ dbt_utils.type_timestamp() }})= cast({{ dbt_utils.dateadd(datepart, interval, 'previous_' + column_name) }} as {{ dbt_utils.type_timestamp() }})) + where not(cast({{ column_name }} as {{ dbt_utils.type_timestamp() }})= cast({{ dbt_utils.dateadd(datepart, interval, previous_column_name) }} as {{ dbt_utils.type_timestamp() }})) {% else %} - where not({{ column_name }} = previous_{{ column_name }} + {{ interval }}) + where not({{ column_name }} = {{ previous_column_name }} + {{ interval }}) {% endif %} ) From 46fcdf63f4df0f0748714285d2a36f87aa08e7c0 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Fri, 18 Feb 2022 15:36:51 +1300 Subject: [PATCH 09/25] Add any value (#501) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add link for fewer_rows_than schema test in docs (#465) * Update get_query_results_as_dict example to demonstrate accessing columnar results as dictionary values (#474) * Update get_qu ery_results_as_dict example to demonstrate accessing columnar results as dictionary values * Use slugify in example * Fix slugify example with dbt_utils. package prefix Co-authored-by: Elize Papineau * Add note about not_null_where deprecation to Readme (#477) * Add note about not_null_where deprecation to Readme * Add docs to unique_where test * Update pull_request_template.md to reference `main` vs `master` (#496) * Correct coalesce -> concatenation typo (#495) * add any_value cross-db macro * Missing colon in test * Update CHANGELOG.md Co-authored-by: José Coto Co-authored-by: Elize Papineau Co-authored-by: Elize Papineau Co-authored-by: Joe Ste.Marie Co-authored-by: Niall Woodward --- CHANGELOG.md | 3 +++ README.md | 2 +- .../data/cross_db/data_any_value_expected.csv | 4 ++++ .../models/cross_db_utils/schema.yml | 6 ++++++ .../models/cross_db_utils/test_any_value.sql | 19 +++++++++++++++++++ macros/cross_db_utils/any_value.sql | 17 +++++++++++++++++ 6 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 integration_tests/data/cross_db/data_any_value_expected.csv create mode 100644 integration_tests/models/cross_db_utils/test_any_value.sql create mode 100644 macros/cross_db_utils/any_value.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index f0140657..13a9d880 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # dbt-utils v0.8.1 +## New features +- A cross-database implementation of `any_value()` ([#497](https://github.com/dbt-labs/dbt-utils/issues/497), [#501](https://github.com/dbt-labs/dbt-utils/pull/501)) + ## Under the hood - also ignore `dbt_packages/` directory [#463](https://github.com/dbt-labs/dbt-utils/pull/463) diff --git a/README.md b/README.md index e73a04a9..3aa3d36f 100644 --- a/README.md +++ b/README.md @@ -461,7 +461,7 @@ in isolation. We generally recommend testing this uniqueness condition by either: * generating a [surrogate_key](#surrogate_key-source) for your model and testing the uniqueness of said key, OR -* passing the `unique` test a coalesce of the columns (as discussed [here](https://docs.getdbt.com/docs/building-a-dbt-project/testing-and-documentation/testing/#testing-expressions)). +* passing the `unique` test a concatenation of the columns (as discussed [here](https://docs.getdbt.com/docs/building-a-dbt-project/testing-and-documentation/testing/#testing-expressions)). However, these approaches can become non-perfomant on large data sets, in which case we recommend using this test instead. diff --git a/integration_tests/data/cross_db/data_any_value_expected.csv b/integration_tests/data/cross_db/data_any_value_expected.csv new file mode 100644 index 00000000..95e6aefb --- /dev/null +++ b/integration_tests/data/cross_db/data_any_value_expected.csv @@ -0,0 +1,4 @@ +key_name,static_col,num_rows +abc,dbt,2 +jkl,dbt,3 +xyz,test,1 \ No newline at end of file diff --git a/integration_tests/models/cross_db_utils/schema.yml b/integration_tests/models/cross_db_utils/schema.yml index 3f576fce..6de8eaf4 100644 --- a/integration_tests/models/cross_db_utils/schema.yml +++ b/integration_tests/models/cross_db_utils/schema.yml @@ -1,6 +1,12 @@ version: 2 models: + - name: test_any_value + tests: + - dbt_utils.equality: + compare_model: ref('data_any_value_expected') + + - name: test_concat tests: - assert_equal: diff --git a/integration_tests/models/cross_db_utils/test_any_value.sql b/integration_tests/models/cross_db_utils/test_any_value.sql new file mode 100644 index 00000000..9b27ed1d --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_any_value.sql @@ -0,0 +1,19 @@ +with some_model as ( + select 1 as id, 'abc' as key_name, 'dbt' as static_col union all + select 2 as id, 'abc' as key_name, 'dbt' as static_col union all + select 3 as id, 'jkl' as key_name, 'dbt' as static_col union all + select 4 as id, 'jkl' as key_name, 'dbt' as static_col union all + select 5 as id, 'jkl' as key_name, 'dbt' as static_col union all + select 6 as id, 'xyz' as key_name, 'test' as static_col +), + +final as ( + select + key_name, + {{ dbt_utils.any_value('static_col') }} as static_col, + count(id) as num_rows + from some_model + group by key_name +) + +select * from final \ No newline at end of file diff --git a/macros/cross_db_utils/any_value.sql b/macros/cross_db_utils/any_value.sql new file mode 100644 index 00000000..78cb75ba --- /dev/null +++ b/macros/cross_db_utils/any_value.sql @@ -0,0 +1,17 @@ +{% macro any_value(expression) -%} + {{ return(adapter.dispatch('any_value', 'dbt_utils') (expression)) }} +{% endmacro %} + + +{% macro default__any_value(expression) -%} + + any_value({{ expression }}) + +{%- endmacro %} + + +{% macro postgres__any_value(expression) -%} + {#- /*Postgres doesn't support any_value, so we're using min() to get the same result*/ -#} + min({{ expression }}) + +{%- endmacro %} \ No newline at end of file From 58af46e9e3995af268207e7e795131b31e11ddeb Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Fri, 18 Feb 2022 16:01:30 +1300 Subject: [PATCH 10/25] Fix changelog --- CHANGELOG.md | 17 ++++++++++------- .../models/sql/test_star_aggregate.sql | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13a9d880..4d3eb456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,28 +5,31 @@ ## Under the hood - also ignore `dbt_packages/` directory [#463](https://github.com/dbt-labs/dbt-utils/pull/463) +- Remove block comments to make date_spine macro compatible with the Athena connector ([#462](https://github.com/dbt-labs/dbt-utils/pull/462)) ## Fixes - `type_timestamp` macro now explicitly casts postgres and redshift warehouse timestamp data types as `timestamp without time zone`, to be consistent with Snowflake behaviour (`timestamp_ntz`). - `union_relations` macro will now raise an exception if the use of `include` or `exclude` results in no columns ([#473](https://github.com/dbt-labs/dbt-utils/pull/473), [#266](https://github.com/dbt-labs/dbt-utils/issues/266)). +- `get_relations_by_pattern()` works with foreign data wrappers on Postgres again. ([#357](https://github.com/dbt-labs/dbt-utils/issues/357), [#476](https://github.com/dbt-labs/dbt-utils/pull/476)) +- `star()` will only alias columns if a prefix/suffix is provided, to allow the unmodified output to still be used in `group by` clauses etc. [#468](https://github.com/dbt-labs/dbt-utils/pull/468) +- The `sequential_values` test is now compatible with quoted columns [#479](https://github.com/dbt-labs/dbt-utils/pull/479) ## Contributors: - [grahamwetzler](https://github.com/grahamwetzler) (#473) +- [Aesthet](https://github.com/Aesthet) (#476) +- [Kamitenshi](https://github.com/Kamitenshi) (#462) +- [nickperrott](https://github.com/nickperrott) (#468) +- [jelstongreen](https://github.com/jelstongreen) (#468) +- [armandduijn](https://github.com/armandduijn) (#479) + # dbt-utils v0.8.0 ## 🚨 Breaking changes - dbt ONE POINT OH is here! This version of dbt-utils requires _any_ version (minor and patch) of v1, which means far less need for compatibility releases in the future. - The partition column in the `mutually_exclusive_ranges` test is now always called `partition_by_col`. This enables compatibility with `--store-failures` when multiple columns are concatenated together. If you have models built on top of the failures table, update them to reflect the new column name. ([#423](https://github.com/dbt-labs/dbt-utils/issues/423), [#430](https://github.com/dbt-labs/dbt-utils/pull/430)) -## Fixes -- `get_relations_by_pattern()` now uses additional sub macros `get_table_types_sql()` to determine table types for different database engines. ([#357](https://github.com/dbt-labs/dbt-utils/issues/357), [#476](https://github.com/dbt-labs/dbt-utils/pull/476)) - -## Under the hood -- make date_spine macro compatible with the Athena connector (#462) - ## Contributors: - [codigo-ergo-sum](https://github.com/codigo-ergo-sum) (#430) -- [Aesthet](https://github.com/Aesthet) (#476) # dbt-utils 0.7.5 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (🎉). Projects using dbt-utils 0.7.4 with dbt-core v1.0.0 can expect to see a deprecation warning. This will be resolved in dbt_utils v0.8.0. diff --git a/integration_tests/models/sql/test_star_aggregate.sql b/integration_tests/models/sql/test_star_aggregate.sql index 102f0226..9dcd7c2d 100644 --- a/integration_tests/models/sql/test_star_aggregate.sql +++ b/integration_tests/models/sql/test_star_aggregate.sql @@ -1,4 +1,4 @@ -{#-/*This test checks that column aliases aren't applied unless there's a prefix/suffix necessary, to ensure that GROUP BYs keep working*/-#} +/*This test checks that column aliases aren't applied unless there's a prefix/suffix necessary, to ensure that GROUP BYs keep working*/ {% set selected_columns = dbt_utils.star(from=ref('data_star_aggregate'), except=['value_field']) %} From 52433816c91965280c88b2fc8559b1e960b7c035 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Wed, 23 Feb 2022 09:41:10 +1300 Subject: [PATCH 11/25] Second take at fixing pivot to allow single quotes (#503) * fix pivot : in pivoted column value, single quote must be escaped (on postgresql) else ex. syntax error near : when color = 'blue's' * patched expected * single quote escape : added dispatched version of the macro to support bigquery & snowflake * second backslash to escape in Jinja, change case of test file columns Let's see if other databases allow this * explicitly list columns to compare * different tests for snowflake and others * specific comparison seed * Don't quote identifiers for apostrophe, to avoid BQ and SF problems * Whitespace management for macros * Update CHANGELOG.md Co-authored-by: Marc Dutoo --- CHANGELOG.md | 2 ++ integration_tests/data/sql/data_pivot.csv | 3 ++- .../data/sql/data_pivot_expected.csv | 2 +- .../data/sql/data_pivot_expected_apostrophe.csv | 3 +++ integration_tests/dbt_project.yml | 1 - integration_tests/models/sql/schema.yml | 5 +++++ .../models/sql/test_pivot_apostrophe.sql | 17 +++++++++++++++++ macros/cross_db_utils/escape_single_quotes.sql | 15 +++++++++++++++ macros/sql/pivot.sql | 2 +- 9 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 integration_tests/data/sql/data_pivot_expected_apostrophe.csv create mode 100644 integration_tests/models/sql/test_pivot_apostrophe.sql create mode 100644 macros/cross_db_utils/escape_single_quotes.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d3eb456..5e1e26b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - `get_relations_by_pattern()` works with foreign data wrappers on Postgres again. ([#357](https://github.com/dbt-labs/dbt-utils/issues/357), [#476](https://github.com/dbt-labs/dbt-utils/pull/476)) - `star()` will only alias columns if a prefix/suffix is provided, to allow the unmodified output to still be used in `group by` clauses etc. [#468](https://github.com/dbt-labs/dbt-utils/pull/468) - The `sequential_values` test is now compatible with quoted columns [#479](https://github.com/dbt-labs/dbt-utils/pull/479) +- `pivot()` escapes values containing apostrophes [#503](https://github.com/dbt-labs/dbt-utils/pull/503) ## Contributors: - [grahamwetzler](https://github.com/grahamwetzler) (#473) @@ -21,6 +22,7 @@ - [nickperrott](https://github.com/nickperrott) (#468) - [jelstongreen](https://github.com/jelstongreen) (#468) - [armandduijn](https://github.com/armandduijn) (#479) +- [mdutoo](https://github.com/mdutoo) (#503) # dbt-utils v0.8.0 diff --git a/integration_tests/data/sql/data_pivot.csv b/integration_tests/data/sql/data_pivot.csv index e43cfd90..4725599d 100644 --- a/integration_tests/data/sql/data_pivot.csv +++ b/integration_tests/data/sql/data_pivot.csv @@ -1,4 +1,5 @@ size,color S,red S,blue -M,red +S,blue's +M,red \ No newline at end of file diff --git a/integration_tests/data/sql/data_pivot_expected.csv b/integration_tests/data/sql/data_pivot_expected.csv index ea309371..a7702c78 100644 --- a/integration_tests/data/sql/data_pivot_expected.csv +++ b/integration_tests/data/sql/data_pivot_expected.csv @@ -1,3 +1,3 @@ size,red,blue S,1,1 -M,1,0 +M,1,0 \ No newline at end of file diff --git a/integration_tests/data/sql/data_pivot_expected_apostrophe.csv b/integration_tests/data/sql/data_pivot_expected_apostrophe.csv new file mode 100644 index 00000000..1f403a22 --- /dev/null +++ b/integration_tests/data/sql/data_pivot_expected_apostrophe.csv @@ -0,0 +1,3 @@ +size,red,blue,blues +S,1,1,1 +M,1,0,0 \ No newline at end of file diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index bb6d147b..b4421128 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -60,7 +60,6 @@ seeds: # this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet +post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}" - schema_tests: data_test_sequential_timestamps: +column_types: diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index e8d11f4e..616dd386 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -85,6 +85,11 @@ models: tests: - dbt_utils.equality: compare_model: ref('data_pivot_expected') + + - name: test_pivot_apostrophe + tests: + - dbt_utils.equality: + compare_model: ref('data_pivot_expected_apostrophe') - name: test_unpivot_original_api tests: diff --git a/integration_tests/models/sql/test_pivot_apostrophe.sql b/integration_tests/models/sql/test_pivot_apostrophe.sql new file mode 100644 index 00000000..792f4686 --- /dev/null +++ b/integration_tests/models/sql/test_pivot_apostrophe.sql @@ -0,0 +1,17 @@ + +-- TODO: How do we make this work nicely on Snowflake too? + +{% if target.type == 'snowflake' %} + {% set column_values = ['RED', 'BLUE', "BLUE'S"] %} + {% set cmp = 'ilike' %} +{% else %} + {% set column_values = ['red', 'blue', "blue's"] %} + {% set cmp = '=' %} +{% endif %} + +select + size, + {{ dbt_utils.pivot('color', column_values, cmp=cmp, quote_identifiers=False) }} + +from {{ ref('data_pivot') }} +group by size diff --git a/macros/cross_db_utils/escape_single_quotes.sql b/macros/cross_db_utils/escape_single_quotes.sql new file mode 100644 index 00000000..ea04ec5b --- /dev/null +++ b/macros/cross_db_utils/escape_single_quotes.sql @@ -0,0 +1,15 @@ +{% macro escape_single_quotes(expression) %} + {{ return(adapter.dispatch('escape_single_quotes', 'dbt_utils') (expression)) }} +{% endmacro %} + +{% macro default__escape_single_quotes(expression) -%} +{{ expression | replace("'","''") }} +{%- endmacro %} + +{% macro snowflake__escape_single_quotes(expression) -%} +{{ expression | replace("'", "\\'") }} +{%- endmacro %} + +{% macro bigquery__escape_single_quotes(expression) -%} +{{ expression | replace("'", "\\'") }} +{%- endmacro %} diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql index b8f12ee5..e19e76c0 100644 --- a/macros/sql/pivot.sql +++ b/macros/sql/pivot.sql @@ -69,7 +69,7 @@ Arguments: {{ agg }}( {% if distinct %} distinct {% endif %} case - when {{ column }} {{ cmp }} '{{ v }}' + when {{ column }} {{ cmp }} '{{ dbt_utils.escape_single_quotes(v) }}' then {{ then_value }} else {{ else_value }} end From 2cfe8c0536f085bd697fa34f27c8c7be900faa6a Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Wed, 23 Feb 2022 09:44:28 +1300 Subject: [PATCH 12/25] Add bool or cross db (#504) * Create bool_or cross-db func * Forgot a comma * Update CHANGELOG.md --- CHANGELOG.md | 1 + .../data/cross_db/data_bool_or.csv | 8 +++++++ .../data/cross_db/data_bool_or_expected.csv | 5 ++++ .../models/cross_db_utils/schema.yml | 4 ++++ .../models/cross_db_utils/test_bool_or.sql | 5 ++++ macros/cross_db_utils/bool_or.sql | 24 +++++++++++++++++++ 6 files changed, 47 insertions(+) create mode 100644 integration_tests/data/cross_db/data_bool_or.csv create mode 100644 integration_tests/data/cross_db/data_bool_or_expected.csv create mode 100644 integration_tests/models/cross_db_utils/test_bool_or.sql create mode 100644 macros/cross_db_utils/bool_or.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e1e26b9..67b690e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## New features - A cross-database implementation of `any_value()` ([#497](https://github.com/dbt-labs/dbt-utils/issues/497), [#501](https://github.com/dbt-labs/dbt-utils/pull/501)) +- A cross-database implementation of `bool_or()` ([#504](https://github.com/dbt-labs/dbt-utils/pull/504)) ## Under the hood - also ignore `dbt_packages/` directory [#463](https://github.com/dbt-labs/dbt-utils/pull/463) diff --git a/integration_tests/data/cross_db/data_bool_or.csv b/integration_tests/data/cross_db/data_bool_or.csv new file mode 100644 index 00000000..97ea2ab5 --- /dev/null +++ b/integration_tests/data/cross_db/data_bool_or.csv @@ -0,0 +1,8 @@ +key,val1,val2 +abc,1,1 +abc,1,0 +def,1,0 +hij,1,1 +hij,1, +klm,1,0 +klm,1, \ No newline at end of file diff --git a/integration_tests/data/cross_db/data_bool_or_expected.csv b/integration_tests/data/cross_db/data_bool_or_expected.csv new file mode 100644 index 00000000..14f6e92d --- /dev/null +++ b/integration_tests/data/cross_db/data_bool_or_expected.csv @@ -0,0 +1,5 @@ +key,value +abc,true +def,false +hij,true +klm,false \ No newline at end of file diff --git a/integration_tests/models/cross_db_utils/schema.yml b/integration_tests/models/cross_db_utils/schema.yml index 6de8eaf4..dbe7a8f4 100644 --- a/integration_tests/models/cross_db_utils/schema.yml +++ b/integration_tests/models/cross_db_utils/schema.yml @@ -6,6 +6,10 @@ models: - dbt_utils.equality: compare_model: ref('data_any_value_expected') + - name: test_bool_or + tests: + - dbt_utils.equality: + compare_model: ref('data_bool_or_expected') - name: test_concat tests: diff --git a/integration_tests/models/cross_db_utils/test_bool_or.sql b/integration_tests/models/cross_db_utils/test_bool_or.sql new file mode 100644 index 00000000..7375d1e4 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_bool_or.sql @@ -0,0 +1,5 @@ +select + key, + {{ dbt_utils.bool_or('val1 = val2') }} as value +from {{ ref('data_bool_or' )}} +group by key \ No newline at end of file diff --git a/macros/cross_db_utils/bool_or.sql b/macros/cross_db_utils/bool_or.sql new file mode 100644 index 00000000..ce0a6857 --- /dev/null +++ b/macros/cross_db_utils/bool_or.sql @@ -0,0 +1,24 @@ +{% macro bool_or(expression) -%} + {{ return(adapter.dispatch('bool_or', 'dbt_utils') (expression)) }} +{% endmacro %} + + +{% macro default__bool_or(expression) -%} + + bool_or({{ expression }}) + +{%- endmacro %} + + +{% macro snowflake__bool_or(expression) -%} + + boolor_agg({{ expression }}) + +{%- endmacro %} + + +{% macro bigquery__bool_or(expression) -%} + + logical_or({{ expression }}) + +{%- endmacro %} \ No newline at end of file From e4cbf62960f54167d93e49fe176550421b956cf9 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Wed, 23 Feb 2022 10:15:31 +1300 Subject: [PATCH 13/25] Code review tweaks --- macros/cross_db_utils/escape_single_quotes.sql | 3 +++ macros/sql/pivot.sql | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/macros/cross_db_utils/escape_single_quotes.sql b/macros/cross_db_utils/escape_single_quotes.sql index ea04ec5b..d024f16f 100644 --- a/macros/cross_db_utils/escape_single_quotes.sql +++ b/macros/cross_db_utils/escape_single_quotes.sql @@ -2,14 +2,17 @@ {{ return(adapter.dispatch('escape_single_quotes', 'dbt_utils') (expression)) }} {% endmacro %} +{# /*Default to replacing a single apostrophe with two apostrophes: they're -> they''re*/ #} {% macro default__escape_single_quotes(expression) -%} {{ expression | replace("'","''") }} {%- endmacro %} +{# /*Snowflake uses a single backslash: they're -> they\'re. The second backslash is to escape it from Jinja */ #} {% macro snowflake__escape_single_quotes(expression) -%} {{ expression | replace("'", "\\'") }} {%- endmacro %} +{# /*BigQuery uses a single backslash: they're -> they\'re. The second backslash is to escape it from Jinja */ #} {% macro bigquery__escape_single_quotes(expression) -%} {{ expression | replace("'", "\\'") }} {%- endmacro %} diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql index e19e76c0..88751062 100644 --- a/macros/sql/pivot.sql +++ b/macros/sql/pivot.sql @@ -65,20 +65,20 @@ Arguments: else_value=0, quote_identifiers=True, distinct=False) %} - {% for v in values %} + {% for value in values %} {{ agg }}( {% if distinct %} distinct {% endif %} case - when {{ column }} {{ cmp }} '{{ dbt_utils.escape_single_quotes(v) }}' + when {{ column }} {{ cmp }} '{{ dbt_utils.escape_single_quotes(value) }}' then {{ then_value }} else {{ else_value }} end ) {% if alias %} {% if quote_identifiers %} - as {{ adapter.quote(prefix ~ v ~ suffix) }} + as {{ adapter.quote(prefix ~ value ~ suffix) }} {% else %} - as {{ dbt_utils.slugify(prefix ~ v ~ suffix) }} + as {{ dbt_utils.slugify(prefix ~ value ~ suffix) }} {% endif %} {% endif %} {% if not loop.last %},{% endif %} From a2b6684b0196f8d8c88db3170cc3c9484ff43f3c Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Thu, 3 Mar 2022 14:14:54 +1300 Subject: [PATCH 14/25] Fix union_relations error when no include/exclude provided (#509) --- macros/sql/union.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/sql/union.sql b/macros/sql/union.sql index 009a765a..a7bf1d95 100644 --- a/macros/sql/union.sql +++ b/macros/sql/union.sql @@ -61,7 +61,7 @@ {%- set ordered_column_names = column_superset.keys() -%} - {%- if not column_superset.keys() -%} + {% if (include | length > 0 or exclude | length > 0) and not column_superset.keys() %} {%- set relations_string -%} {%- for relation in relations -%} {{ relation.name }} From 15627b9e661144faaeb697c5f7698ae4bac23c49 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Thu, 3 Mar 2022 14:17:54 +1300 Subject: [PATCH 15/25] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67b690e7..f346c55b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# dbt-utils v0.8.2 +## Fixes +- Fix union_relations error from [#473](https://github.com/dbt-labs/dbt-utils/pull/473) when no include/exclude parameters are provided ([#505](https://github.com/dbt-labs/dbt-utils/issues/505), [#509](https://github.com/dbt-labs/dbt-utils/pull/509)) + # dbt-utils v0.8.1 ## New features From 3e814b2e0a33ddd3c9cf2bddb68b710ff5777e2e Mon Sep 17 00:00:00 2001 From: Elize Papineau Date: Sun, 13 Mar 2022 22:01:44 -0700 Subject: [PATCH 16/25] Add _is_ephemeral test to get_column_values (#518) * Add _is_ephemeral test Co-authored-by: Elize Papineau --- README.md | 100 ++++++++++++++++--------------- macros/sql/get_column_values.sql | 2 + 2 files changed, 55 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index bab3a503..f707a376 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this [Materializations](#materializations): - [insert_by_period](#insert_by_period-source) ---- +---- ### Schema Tests #### equal_rowcount ([source](macros/schema_tests/equal_rowcount.sql)) This schema test asserts the that two relations have the same number of rows. @@ -310,6 +310,7 @@ models: to: ref('other_model_name') field: client_id from_condition: id <> '4ca448b8-24bf-4b88-96c6-b1609499c38b' + to_condition: created_date >= '2020-01-01' ``` #### mutually_exclusive_ranges ([source](macros/schema_tests/mutually_exclusive_ranges.sql)) @@ -377,53 +378,58 @@ models: partition_by: customer_id gaps: allowed ``` +
+Additional `gaps` and `zero_length_range_allowed` examples + + **Understanding the `gaps` argument:** + + Here are a number of examples for each allowed `gaps` argument. + * `gaps: not_allowed`: The upper bound of one record must be the lower bound of + the next record. + + | lower_bound | upper_bound | + |-------------|-------------| + | 0 | 1 | + | 1 | 2 | + | 2 | 3 | + + * `gaps: allowed` (default): There may be a gap between the upper bound of one + record and the lower bound of the next record. + + | lower_bound | upper_bound | + |-------------|-------------| + | 0 | 1 | + | 2 | 3 | + | 3 | 4 | + + * `gaps: required`: There must be a gap between the upper bound of one record and + the lower bound of the next record (common for date ranges). + + | lower_bound | upper_bound | + |-------------|-------------| + | 0 | 1 | + | 2 | 3 | + | 4 | 5 | + + **Understanding the `zero_length_range_allowed` argument:** + Here are a number of examples for each allowed `zero_length_range_allowed` argument. + * `zero_length_range_allowed: false`: (default) The upper bound of each record must be greater than its lower bound. + + | lower_bound | upper_bound | + |-------------|-------------| + | 0 | 1 | + | 1 | 2 | + | 2 | 3 | + + * `zero_length_range_allowed: true`: The upper bound of each record can be greater than or equal to its lower bound. + + | lower_bound | upper_bound | + |-------------|-------------| + | 0 | 1 | + | 2 | 2 | + | 3 | 4 | -**Understanding the `gaps` argument:** -Here are a number of examples for each allowed `gaps` argument. -* `gaps: not_allowed`: The upper bound of one record must be the lower bound of -the next record. - -| lower_bound | upper_bound | -|-------------|-------------| -| 0 | 1 | -| 1 | 2 | -| 2 | 3 | - -* `gaps: allowed` (default): There may be a gap between the upper bound of one -record and the lower bound of the next record. - -| lower_bound | upper_bound | -|-------------|-------------| -| 0 | 1 | -| 2 | 3 | -| 3 | 4 | - -* `gaps: required`: There must be a gap between the upper bound of one record and -the lower bound of the next record (common for date ranges). - -| lower_bound | upper_bound | -|-------------|-------------| -| 0 | 1 | -| 2 | 3 | -| 4 | 5 | - -**Understanding the `zero_length_range_allowed` argument:** -Here are a number of examples for each allowed `zero_length_range_allowed` argument. -* `zero_length_range_allowed: false`: (default) The upper bound of each record must be greater than its lower bound. - -| lower_bound | upper_bound | -|-------------|-------------| -| 0 | 1 | -| 1 | 2 | -| 2 | 3 | - -* `zero_length_range_allowed: true`: The upper bound of each record can be greater than or equal to its lower bound. - -| lower_bound | upper_bound | -|-------------|-------------| -| 0 | 1 | -| 2 | 2 | -| 3 | 4 | +
#### sequential_values ([source](macros/schema_tests/sequential_values.sql)) This test confirms that a column contains sequential values. It can be used diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 57b150a6..2a7c62ee 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -11,6 +11,8 @@ {{ return(default) }} {% endif %} + {%- do dbt_utils._is_ephemeral(table, 'get_column_values') -%} + {# Not all relations are tables. Renaming for internal clarity without breaking functionality for anyone using named arguments #} {# TODO: Change the method signature in a future 0.x.0 release #} {%- set target_relation = table -%} From 3a4cc94ce2dfc654c0d61328985ff6e67e2329e8 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Tue, 22 Mar 2022 00:20:35 +0000 Subject: [PATCH 17/25] Add deduplication macro (#512) * Update README.md * Mutually excl range examples in disclosure triangle * Fix union_relations error when no include/exclude provided * Fix union_relations error when no include/exclude provided (#509) * Update CHANGELOG.md * Add dedupe macro * Add test for dedupe macro * Add documentation to README * Add entry to CHANGELOG * Implement review --- CHANGELOG.md | 6 ++- README.md | 17 ++++++- .../data/sql/data_deduplicate.csv | 3 ++ .../data/sql/data_deduplicate_expected.csv | 2 + integration_tests/models/sql/schema.yml | 9 +++- .../models/sql/test_deduplicate.sql | 7 +++ macros/sql/deduplicate.sql | 46 +++++++++++++++++++ 7 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 integration_tests/data/sql/data_deduplicate.csv create mode 100644 integration_tests/data/sql/data_deduplicate_expected.csv create mode 100644 integration_tests/models/sql/test_deduplicate.sql create mode 100644 macros/sql/deduplicate.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index f346c55b..c9046d7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# dbt-utils v0.8.3 +## New features +- A macro for deduplicating data ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) + # dbt-utils v0.8.2 ## Fixes - Fix union_relations error from [#473](https://github.com/dbt-labs/dbt-utils/pull/473) when no include/exclude parameters are provided ([#505](https://github.com/dbt-labs/dbt-utils/issues/505), [#509](https://github.com/dbt-labs/dbt-utils/pull/509)) @@ -32,7 +36,7 @@ # dbt-utils v0.8.0 ## 🚨 Breaking changes -- dbt ONE POINT OH is here! This version of dbt-utils requires _any_ version (minor and patch) of v1, which means far less need for compatibility releases in the future. +- dbt ONE POINT OH is here! This version of dbt-utils requires _any_ version (minor and patch) of v1, which means far less need for compatibility releases in the future. - The partition column in the `mutually_exclusive_ranges` test is now always called `partition_by_col`. This enables compatibility with `--store-failures` when multiple columns are concatenated together. If you have models built on top of the failures table, update them to reflect the new column name. ([#423](https://github.com/dbt-labs/dbt-utils/issues/423), [#430](https://github.com/dbt-labs/dbt-utils/pull/430)) ## Contributors: diff --git a/README.md b/README.md index f707a376..ae922a1f 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [SQL generators](#sql-generators) - [date_spine](#date_spine-source) + - [dedupe](#dedupe-source) - [haversine_distance](#haversine_distance-source) - [group_by](#group_by-source) - [star](#star-source) @@ -706,6 +707,20 @@ This macro returns the sql required to build a date spine. The spine will includ }} ``` +#### deduplicate ([source](macros/sql/deduplicate.sql)) +This macro returns the sql required to remove duplicate rows from a model or source. + +**Usage:** + +``` +{{ dbt_utils.deduplicate( + relation=source('my_source', 'my_table'), + group_by="user_id, cast(timestamp as day)", + order_by="timestamp desc" + ) +}} +``` + #### haversine_distance ([source](macros/sql/haversine_distance.sql)) This macro calculates the [haversine distance](http://daynebatten.com/2015/09/latitude-longitude-distance-sql/) between a pair of x/y coordinates. @@ -748,7 +763,7 @@ group by 1,2,3 ``` #### star ([source](macros/sql/star.sql)) -This macro generates a comma-separated list of all fields that exist in the `from` relation, excluding any fields listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with the star macro. This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias (`relation_alias`.`field_name`). +This macro generates a comma-separated list of all fields that exist in the `from` relation, excluding any fields listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with the star macro. This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias (`relation_alias`.`field_name`). The macro also has optional `prefix` and `suffix` arguments. When one or both are provided, they will be concatenated onto each field's alias in the output (`prefix` ~ `field_name` ~ `suffix`). NB: This prevents the output from being used in any context other than a select statement. diff --git a/integration_tests/data/sql/data_deduplicate.csv b/integration_tests/data/sql/data_deduplicate.csv new file mode 100644 index 00000000..c3ae0c4d --- /dev/null +++ b/integration_tests/data/sql/data_deduplicate.csv @@ -0,0 +1,3 @@ +user_id,event,version +1,play,1 +1,play,2 diff --git a/integration_tests/data/sql/data_deduplicate_expected.csv b/integration_tests/data/sql/data_deduplicate_expected.csv new file mode 100644 index 00000000..de5e204d --- /dev/null +++ b/integration_tests/data/sql/data_deduplicate_expected.csv @@ -0,0 +1,2 @@ +user_id,event,version +1,play,2 diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index 616dd386..ad990260 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -85,7 +85,7 @@ models: tests: - dbt_utils.equality: compare_model: ref('data_pivot_expected') - + - name: test_pivot_apostrophe tests: - dbt_utils.equality: @@ -137,8 +137,13 @@ models: tests: - dbt_utils.equality: compare_model: ref('data_union_expected') - + - name: test_get_relations_by_pattern tests: - dbt_utils.equality: compare_model: ref('data_union_events_expected') + + - name: test_dedupe + tests: + - dbt_utils.equality: + compare_model: ref('data_deduplicate_expected') diff --git a/integration_tests/models/sql/test_deduplicate.sql b/integration_tests/models/sql/test_deduplicate.sql new file mode 100644 index 00000000..7df79261 --- /dev/null +++ b/integration_tests/models/sql/test_deduplicate.sql @@ -0,0 +1,7 @@ +with deduped as ( + + {{ dbt_utils.deduplicate(ref('data_deduplicate'), group_by='user_id', order_by='version desc') | indent }} + +) + +select * from deduped diff --git a/macros/sql/deduplicate.sql b/macros/sql/deduplicate.sql new file mode 100644 index 00000000..f5d65534 --- /dev/null +++ b/macros/sql/deduplicate.sql @@ -0,0 +1,46 @@ +{%- macro deduplicate(relation, group_by, order_by=none) -%} + {{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by)) }} +{% endmacro %} + +{%- macro default__deduplicate(relation, group_by, order_by=none) -%} + + select + {{ dbt_utils.star(relation, relation_alias='deduped') | indent }} + from ( + select + _inner.*, + row_number() over ( + partition by {{ group_by }} + {% if order_by is not none -%} + order by {{ order_by }} + {%- endif %} + ) as rn + from {{ relation }} as _inner + ) as deduped + where deduped.rn = 1 + +{%- endmacro -%} + +{# +-- It is more performant to deduplicate using `array_agg` with a limit +-- clause in BigQuery: +-- https://github.com/dbt-labs/dbt-utils/issues/335#issuecomment-788157572 +#} +{%- macro bigquery__deduplicate(relation, group_by, order_by=none) -%} + + select + {{ dbt_utils.star(relation, relation_alias='deduped') | indent }} + from ( + select + array_agg ( + original + {% if order_by is not none -%} + order by {{ order_by }} + {%- endif %} + limit 1 + )[offset(0)] as deduped + from {{ relation }} as original + group by {{ group_by }} + ) + +{%- endmacro -%} From e2f6ba7b41f5dcdf33ce056c195f899372607128 Mon Sep 17 00:00:00 2001 From: Luis Leon <98919783+luisleon90@users.noreply.github.com> Date: Tue, 22 Mar 2022 23:34:59 -0400 Subject: [PATCH 18/25] Typed materialized views as views (#525) * Typed materialized views as views * Update get_relations_by_pattern.sql * Moving fix from get_tables_by_pattern_sql reverting changes to this file to add a fix to the macro get_tables_by_pattern_sql * removing quoting from table_type removing quoting from table_type as this was causing an error when calling this macro within get_tables_by_pattern_sql * calling get_table_types_sql for materialized views calling get_table_types_sql macro to handle materialized views in sources. --- CHANGELOG.md | 1 + README.md | 3 +-- macros/sql/get_relations_by_pattern.sql | 2 +- macros/sql/get_table_types_sql.sql | 2 +- macros/sql/get_tables_by_pattern_sql.sql | 5 +---- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9046d7f..5bb4df53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,4 @@ + # dbt-utils v0.8.3 ## New features - A macro for deduplicating data ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) diff --git a/README.md b/README.md index ae922a1f..acc7c825 100644 --- a/README.md +++ b/README.md @@ -383,7 +383,6 @@ models: Additional `gaps` and `zero_length_range_allowed` examples **Understanding the `gaps` argument:** - Here are a number of examples for each allowed `gaps` argument. * `gaps: not_allowed`: The upper bound of one record must be the lower bound of the next record. @@ -787,7 +786,7 @@ from {{ ref('my_model') }} This macro unions together an array of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation), even when columns have differing orders in each Relation, and/or some columns are missing from some relations. Any columns exclusive to a subset of these -relations will be filled with `null` where not present. An new column +relations will be filled with `null` where not present. A new column (`_dbt_source_relation`) is also added to indicate the source for each record. **Usage:** diff --git a/macros/sql/get_relations_by_pattern.sql b/macros/sql/get_relations_by_pattern.sql index 9325a883..aed8bd60 100644 --- a/macros/sql/get_relations_by_pattern.sql +++ b/macros/sql/get_relations_by_pattern.sql @@ -19,7 +19,7 @@ database=database, schema=row.table_schema, identifier=row.table_name, - type=row.table_type + type = row.table_type ) -%} {%- do tbl_relations.append(tbl_relation) -%} {%- endfor -%} diff --git a/macros/sql/get_table_types_sql.sql b/macros/sql/get_table_types_sql.sql index e3f86884..c16275e9 100644 --- a/macros/sql/get_table_types_sql.sql +++ b/macros/sql/get_table_types_sql.sql @@ -8,7 +8,7 @@ when 'EXTERNAL TABLE' then 'external' when 'MATERIALIZED VIEW' then 'materializedview' else lower(table_type) - end as "table_type" + end as table_type {% endmacro %} diff --git a/macros/sql/get_tables_by_pattern_sql.sql b/macros/sql/get_tables_by_pattern_sql.sql index 93f3c6a6..4d5a8fc9 100644 --- a/macros/sql/get_tables_by_pattern_sql.sql +++ b/macros/sql/get_tables_by_pattern_sql.sql @@ -30,10 +30,7 @@ select distinct table_schema, table_name, - case table_type - when 'BASE TABLE' then 'table' - else lower(table_type) - end as table_type + {{ dbt_utils.get_table_types_sql() }} from {{ adapter.quote(database) }}.{{ schema }}.INFORMATION_SCHEMA.TABLES where lower(table_name) like lower ('{{ table_pattern }}') From 96c461b65822440f173ab1a68f1302f96e99aed2 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Fri, 25 Mar 2022 03:52:16 +0000 Subject: [PATCH 19/25] Add `alias` argument to `deduplicate` macro (#526) * Add `alias` argument to `deduplicate * Test `alias` argument * Rename `alias` to `relation_alias` --- README.md | 5 +++-- .../data/sql/data_deduplicate.csv | 1 + .../models/sql/test_deduplicate.sql | 19 +++++++++++++++++-- macros/sql/deduplicate.sql | 12 ++++++------ 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index acc7c825..80c6cbbb 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [SQL generators](#sql-generators) - [date_spine](#date_spine-source) - - [dedupe](#dedupe-source) + - [deduplicate](#deduplicate) - [haversine_distance](#haversine_distance-source) - [group_by](#group_by-source) - [star](#star-source) @@ -715,7 +715,8 @@ This macro returns the sql required to remove duplicate rows from a model or sou {{ dbt_utils.deduplicate( relation=source('my_source', 'my_table'), group_by="user_id, cast(timestamp as day)", - order_by="timestamp desc" + order_by="timestamp desc", + relation_alias="my_cte" ) }} ``` diff --git a/integration_tests/data/sql/data_deduplicate.csv b/integration_tests/data/sql/data_deduplicate.csv index c3ae0c4d..7e06170a 100644 --- a/integration_tests/data/sql/data_deduplicate.csv +++ b/integration_tests/data/sql/data_deduplicate.csv @@ -1,3 +1,4 @@ user_id,event,version 1,play,1 1,play,2 +2,pause,1 diff --git a/integration_tests/models/sql/test_deduplicate.sql b/integration_tests/models/sql/test_deduplicate.sql index 7df79261..81fe81e7 100644 --- a/integration_tests/models/sql/test_deduplicate.sql +++ b/integration_tests/models/sql/test_deduplicate.sql @@ -1,6 +1,21 @@ -with deduped as ( +with - {{ dbt_utils.deduplicate(ref('data_deduplicate'), group_by='user_id', order_by='version desc') | indent }} +source as ( + select * + from {{ ref('data_deduplicate') }} + where user_id = 1 +), + +deduped as ( + + {{ + dbt_utils.deduplicate( + ref('data_deduplicate'), + group_by='user_id', + order_by='version desc', + relation_alias="source" + ) | indent + }} ) diff --git a/macros/sql/deduplicate.sql b/macros/sql/deduplicate.sql index f5d65534..9a3571a2 100644 --- a/macros/sql/deduplicate.sql +++ b/macros/sql/deduplicate.sql @@ -1,8 +1,8 @@ -{%- macro deduplicate(relation, group_by, order_by=none) -%} - {{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by)) }} +{%- macro deduplicate(relation, group_by, order_by=none, relation_alias=none) -%} + {{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by, relation_alias=relation_alias)) }} {% endmacro %} -{%- macro default__deduplicate(relation, group_by, order_by=none) -%} +{%- macro default__deduplicate(relation, group_by, order_by=none, relation_alias=none) -%} select {{ dbt_utils.star(relation, relation_alias='deduped') | indent }} @@ -15,7 +15,7 @@ order by {{ order_by }} {%- endif %} ) as rn - from {{ relation }} as _inner + from {{ relation if relation_alias is none else relation_alias }} as _inner ) as deduped where deduped.rn = 1 @@ -26,7 +26,7 @@ -- clause in BigQuery: -- https://github.com/dbt-labs/dbt-utils/issues/335#issuecomment-788157572 #} -{%- macro bigquery__deduplicate(relation, group_by, order_by=none) -%} +{%- macro bigquery__deduplicate(relation, group_by, order_by=none, relation_alias=none) -%} select {{ dbt_utils.star(relation, relation_alias='deduped') | indent }} @@ -39,7 +39,7 @@ {%- endif %} limit 1 )[offset(0)] as deduped - from {{ relation }} as original + from {{ relation if relation_alias is none else relation_alias }} as original group by {{ group_by }} ) From 839a4fc0024aec690cca5df1aa2bfb1637b36b29 Mon Sep 17 00:00:00 2001 From: Brid Moynihan Date: Mon, 28 Mar 2022 03:20:57 +0100 Subject: [PATCH 20/25] Fix/use generic test naming style instead of schema test (#521) * Updated Rreferences to 'schema test' in README along with small improvements to test descriptions. Updates were also carried out in folder structure and integration README * Updated references to 'schema test' in Changelog * updated changelog with changes to documentation and fproject file structure * Apply suggestions from code review Update macro descriptions to be "asserts that" * Update CHANGELOG.md * Update README.md Co-authored-by: Joel Labes --- CHANGELOG.md | 21 ++--- README.md | 78 ++++++++++--------- integration_tests/README.md | 4 +- .../models/datetime/test_date_spine.sql | 2 +- .../schema.yml | 0 .../test_equal_column_subset.sql | 0 .../test_equal_rowcount.sql | 0 .../test_fewer_rows_than.sql | 0 .../test_recency.sql | 0 .../models/sql/test_generate_series.sql | 2 +- .../accepted_range.sql | 0 .../at_least_one.sql | 0 .../cardinality_equality.sql | 0 .../equal_rowcount.sql | 0 .../equality.sql | 0 .../expression_is_true.sql | 0 .../fewer_rows_than.sql | 0 .../mutually_exclusive_ranges.sql | 0 .../not_accepted_values.sql | 0 .../not_constant.sql | 0 .../not_null_proportion.sql | 0 .../recency.sql | 0 .../relationships_where.sql | 0 .../sequential_values.sql | 0 .../test_not_null_where.sql | 0 .../test_unique_where.sql | 0 .../unique_combination_of_columns.sql | 0 27 files changed, 57 insertions(+), 50 deletions(-) rename integration_tests/models/{schema_tests => generic_tests}/schema.yml (100%) rename integration_tests/models/{schema_tests => generic_tests}/test_equal_column_subset.sql (100%) rename integration_tests/models/{schema_tests => generic_tests}/test_equal_rowcount.sql (100%) rename integration_tests/models/{schema_tests => generic_tests}/test_fewer_rows_than.sql (100%) rename integration_tests/models/{schema_tests => generic_tests}/test_recency.sql (100%) rename macros/{schema_tests => generic_tests}/accepted_range.sql (100%) rename macros/{schema_tests => generic_tests}/at_least_one.sql (100%) rename macros/{schema_tests => generic_tests}/cardinality_equality.sql (100%) rename macros/{schema_tests => generic_tests}/equal_rowcount.sql (100%) rename macros/{schema_tests => generic_tests}/equality.sql (100%) rename macros/{schema_tests => generic_tests}/expression_is_true.sql (100%) rename macros/{schema_tests => generic_tests}/fewer_rows_than.sql (100%) rename macros/{schema_tests => generic_tests}/mutually_exclusive_ranges.sql (100%) rename macros/{schema_tests => generic_tests}/not_accepted_values.sql (100%) rename macros/{schema_tests => generic_tests}/not_constant.sql (100%) rename macros/{schema_tests => generic_tests}/not_null_proportion.sql (100%) rename macros/{schema_tests => generic_tests}/recency.sql (100%) rename macros/{schema_tests => generic_tests}/relationships_where.sql (100%) rename macros/{schema_tests => generic_tests}/sequential_values.sql (100%) rename macros/{schema_tests => generic_tests}/test_not_null_where.sql (100%) rename macros/{schema_tests => generic_tests}/test_unique_where.sql (100%) rename macros/{schema_tests => generic_tests}/unique_combination_of_columns.sql (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bb4df53..366a4964 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,15 @@ - # dbt-utils v0.8.3 ## New features - A macro for deduplicating data ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) +## Quality of life +- Updated references to 'schema test' in project file structure and documentation referred to in [#485](https://github.com/dbt-labs/dbt-utils/issues/485) + # dbt-utils v0.8.2 ## Fixes - Fix union_relations error from [#473](https://github.com/dbt-labs/dbt-utils/pull/473) when no include/exclude parameters are provided ([#505](https://github.com/dbt-labs/dbt-utils/issues/505), [#509](https://github.com/dbt-labs/dbt-utils/pull/509)) # dbt-utils v0.8.1 - ## New features - A cross-database implementation of `any_value()` ([#497](https://github.com/dbt-labs/dbt-utils/issues/497), [#501](https://github.com/dbt-labs/dbt-utils/pull/501)) - A cross-database implementation of `bool_or()` ([#504](https://github.com/dbt-labs/dbt-utils/pull/504)) @@ -92,12 +93,12 @@ ## Features -- Add `not_null_proportion` schema test that allows the user to specify the minimum (`at_least`) tolerated proportion (e.g., `0.95`) of non-null values ([#411](https://github.com/dbt-labs/dbt-utils/pull/411)) +- Add `not_null_proportion` generic test that allows the user to specify the minimum (`at_least`) tolerated proportion (e.g., `0.95`) of non-null values ([#411](https://github.com/dbt-labs/dbt-utils/pull/411)) ## Under the hood - Allow user to provide any case type when defining the `exclude` argument in `dbt_utils.star()` ([#403](https://github.com/dbt-labs/dbt-utils/pull/403)) -- Log whole row instead of just column name in 'accepted_range' schema test to allow better visibility into failures ([#413](https://github.com/dbt-labs/dbt-utils/pull/413)) +- Log whole row instead of just column name in 'accepted_range' generic test to allow better visibility into failures ([#413](https://github.com/dbt-labs/dbt-utils/pull/413)) - Use column name to group in 'get_column_values ' to allow better cross db functionality ([#407](https://github.com/dbt-labs/dbt-utils/pull/407)) # dbt-utils v0.7.1 @@ -154,7 +155,7 @@ If you were relying on the position to match up your optional arguments, this ma ## Features * Add new argument, `order_by`, to `get_column_values` (code originally in [#289](https://github.com/fishtown-analytics/dbt-utils/pull/289/) from [@clausherther](https://github.com/clausherther), merged via [#349](https://github.com/fishtown-analytics/dbt-utils/pull/349/)) * Add `slugify` macro, and use it in the pivot macro. :rotating_light: This macro uses the `re` module, which is only available in dbt v0.19.0+. As a result, this feature introduces a breaking change. ([#314](https://github.com/fishtown-analytics/dbt-utils/pull/314)) -* Add `not_null_proportion` schema test that allows the user to specify the minimum (`at_least`) tolerated proportion (e.g., `0.95`) of non-null values +* Add `not_null_proportion` generic test that allows the user to specify the minimum (`at_least`) tolerated proportion (e.g., `0.95`) of non-null values ## Under the hood * Update the default implementation of concat macro to use `||` operator ([#373](https://github.com/fishtown-analytics/dbt-utils/pull/314) from [@ChristopheDuong](https://github.com/ChristopheDuong)). Note this may be a breaking change for adapters that support `concat()` but not `||`, such as Apache Spark. @@ -165,18 +166,18 @@ If you were relying on the position to match up your optional arguments, this ma ## Fixes -- make `sequential_values` schema test use `dbt_utils.type_timestamp()` to allow for compatibility with db's without timestamp data type. [#376](https://github.com/fishtown-analytics/dbt-utils/pull/376) from [@swanderz](https://github.com/swanderz) +- make `sequential_values` generic test use `dbt_utils.type_timestamp()` to allow for compatibility with db's without timestamp data type. [#376](https://github.com/fishtown-analytics/dbt-utils/pull/376) from [@swanderz](https://github.com/swanderz) # dbt-utils v0.6.5 ## Features * Add new `accepted_range` test ([#276](https://github.com/fishtown-analytics/dbt-utils/pull/276) [@joellabes](https://github.com/joellabes)) * Make `expression_is_true` work as a column test (code originally in [#226](https://github.com/fishtown-analytics/dbt-utils/pull/226/) from [@elliottohara](https://github.com/elliottohara), merged via [#313](https://github.com/fishtown-analytics/dbt-utils/pull/313/)) -* Add new schema test, `not_accepted_values` ([#284](https://github.com/fishtown-analytics/dbt-utils/pull/284) [@JavierMonton](https://github.com/JavierMonton)) +* Add new generic test, `not_accepted_values` ([#284](https://github.com/fishtown-analytics/dbt-utils/pull/284) [@JavierMonton](https://github.com/JavierMonton)) * Support a new argument, `zero_length_range_allowed` in the `mutually_exclusive_ranges` test ([#307](https://github.com/fishtown-analytics/dbt-utils/pull/307) [@zemekeneng](https://github.com/zemekeneng)) -* Add new schema test, `sequential_values` ([#318](https://github.com/fishtown-analytics/dbt-utils/pull/318), inspired by [@hundredwatt](https://github.com/hundredwatt)) +* Add new generic test, `sequential_values` ([#318](https://github.com/fishtown-analytics/dbt-utils/pull/318), inspired by [@hundredwatt](https://github.com/hundredwatt)) * Support `quarter` in the `postgres__last_day` macro ([#333](https://github.com/fishtown-analytics/dbt-utils/pull/333/files) [@seunghanhong](https://github.com/seunghanhong)) * Add new argument, `unit`, to `haversine_distance` ([#340](https://github.com/fishtown-analytics/dbt-utils/pull/340) [@bastienboutonnet](https://github.com/bastienboutonnet)) -* Add new schema test, `fewer_rows_than` (code originally in [#221](https://github.com/fishtown-analytics/dbt-utils/pull/230/) from [@dmarts](https://github.com/dmarts), merged via [#343](https://github.com/fishtown-analytics/dbt-utils/pull/343/)) +* Add new generic test, `fewer_rows_than` (code originally in [#221](https://github.com/fishtown-analytics/dbt-utils/pull/230/) from [@dmarts](https://github.com/dmarts), merged via [#343](https://github.com/fishtown-analytics/dbt-utils/pull/343/)) ## Fixes * Handle booleans gracefully in the unpivot macro ([#305](https://github.com/fishtown-analytics/dbt-utils/pull/305) [@avishalom](https://github.com/avishalom)) @@ -250,7 +251,7 @@ enabling users of community-supported database plugins to add or override macro specific to their database ([#267](https://github.com/fishtown-analytics/dbt-utils/pull/267)) * Use `add_ephemeral_prefix` instead of hard-coding a string literal, to support database adapters that use different prefixes ([#267](https://github.com/fishtown-analytics/dbt-utils/pull/267)) -* Implement a quote_columns argument in the unique_combination_of_columns schema test ([#270](https://github.com/fishtown-analytics/dbt-utils/pull/270) [@JoshuaHuntley](https://github.com/JoshuaHuntley)) +* Implement a quote_columns argument in the unique_combination_of_columns generic test ([#270](https://github.com/fishtown-analytics/dbt-utils/pull/270) [@JoshuaHuntley](https://github.com/JoshuaHuntley)) ## Quality of life * Remove deprecated macros `get_tables_by_prefix` and `union_tables` ([#268](https://github.com/fishtown-analytics/dbt-utils/pull/268)) diff --git a/README.md b/README.md index 80c6cbbb..c9caa8cc 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this ---- ## Contents -**[Schema tests](#schema-tests)** +**[Generic tests](#generic-tests)** - [equal_rowcount](#equal_rowcount-source) - [fewer_rows_than](#fewer_rows_than-source) - [equality](#equality-source) @@ -69,9 +69,9 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [insert_by_period](#insert_by_period-source) ---- -### Schema Tests -#### equal_rowcount ([source](macros/schema_tests/equal_rowcount.sql)) -This schema test asserts the that two relations have the same number of rows. +### Generic Tests +#### equal_rowcount ([source](macros/generic_tests/equal_rowcount.sql)) +Asserts that two relations have the same number of rows. **Usage:** ```yaml @@ -85,8 +85,8 @@ models: ``` -#### fewer_rows_than ([source](macros/schema_tests/fewer_rows_than.sql)) -This schema test asserts that this model has fewer rows than the referenced model. +#### fewer_rows_than ([source](macros/generic_tests/fewer_rows_than.sql)) +Asserts that the respective model has fewer rows than the model being compared. Usage: ```yaml @@ -99,8 +99,8 @@ models: compare_model: ref('other_table_name') ``` -#### equality ([source](macros/schema_tests/equality.sql)) -This schema test asserts the equality of two relations. Optionally specify a subset of columns to compare. +#### equality ([source](macros/generic_tests/equality.sql)) +Asserts the equality of two relations. Optionally specify a subset of columns to compare. **Usage:** ```yaml @@ -116,8 +116,13 @@ models: - second_column ``` -#### expression_is_true ([source](macros/schema_tests/expression_is_true.sql)) -This schema test asserts that a valid sql expression is true for all records. This is useful when checking integrity across columns, for example, that a total is equal to the sum of its parts, or that at least one column is true. +#### expression_is_true ([source](macros/generic_tests/expression_is_true.sql)) +Asserts that a valid SQL expression is true for all records. This is useful when checking integrity across columns. +Examples: + +- Verify an outcome based on the application of basic alegbraic operations between columns. +- Verify the length of a column. +- Verify the truth value of a column. **Usage:** ```yaml @@ -164,8 +169,8 @@ models: condition: col_a = 1 ``` -#### recency ([source](macros/schema_tests/recency.sql)) -This schema test asserts that there is data in the referenced model at least as recent as the defined interval prior to the current timestamp. +#### recency ([source](macros/generic_tests/recency.sql)) +Asserts that a timestamp column in the reference model contains data that is at least as recent as the defined date interval. **Usage:** ```yaml @@ -180,8 +185,8 @@ models: interval: 1 ``` -#### at_least_one ([source](macros/schema_tests/at_least_one.sql)) -This schema test asserts if column has at least one value. +#### at_least_one ([source](macros/generic_tests/at_least_one.sql)) +Asserts that a column has at least one value. **Usage:** ```yaml @@ -195,8 +200,8 @@ models: - dbt_utils.at_least_one ``` -#### not_constant ([source](macros/schema_tests/not_constant.sql)) -This schema test asserts if column does not have same value in all rows. +#### not_constant ([source](macros/generic_tests/not_constant.sql)) +Asserts that a column does not have the same value in all rows. **Usage:** ```yaml @@ -210,8 +215,8 @@ models: - dbt_utils.not_constant ``` -#### cardinality_equality ([source](macros/schema_tests/cardinality_equality.sql)) -This schema test asserts if values in a given column have exactly the same cardinality as values from a different column in a different model. +#### cardinality_equality ([source](macros/generic_tests/cardinality_equality.sql)) +Asserts that values in a given column have exactly the same cardinality as values from a different column in a different model. **Usage:** ```yaml @@ -227,8 +232,8 @@ models: to: ref('other_model_name') ``` -#### unique_where ([source](macros/schema_tests/test_unique_where.sql)) -This test validates that there are no duplicate values present in a field for a subset of rows by specifying a `where` clause. +#### unique_where ([source](macros/generic_tests/test_unique_where.sql)) +Asserts that there are no duplicate values present in a field for a subset of rows by specifying a `where` clause. *Warning*: This test is no longer supported. Starting in dbt v0.20.0, the built-in `unique` test supports a `where` config. [See the dbt docs for more details](https://docs.getdbt.com/reference/resource-configs/where). @@ -245,8 +250,8 @@ models: where: "_deleted = false" ``` -#### not_null_where ([source](macros/schema_tests/test_not_null_where.sql)) -This test validates that there are no null values present in a column for a subset of rows by specifying a `where` clause. +#### not_null_where ([source](macros/generic_tests/test_not_null_where.sql)) +Asserts that there are no null values present in a column for a subset of rows by specifying a `where` clause. *Warning*: This test is no longer supported. Starting in dbt v0.20.0, the built-in `not_null` test supports a `where` config. [See the dbt docs for more details](https://docs.getdbt.com/reference/resource-configs/where). @@ -263,8 +268,8 @@ models: where: "_deleted = false" ``` -#### not_null_proportion ([source](macros/schema_tests/not_null_proportion.sql)) -This test validates that the proportion of non-null values present in a column is between a specified range [`at_least`, `at_most`] where `at_most` is an optional argument (default: `1.0`). +#### not_null_proportion ([source](macros/generic_tests/not_null_proportion.sql)) +Asserts that the proportion of non-null values present in a column is between a specified range [`at_least`, `at_most`] where `at_most` is an optional argument (default: `1.0`). **Usage:** ```yaml @@ -279,8 +284,8 @@ models: at_least: 0.95 ``` -#### not_accepted_values ([source](macros/schema_tests/not_accepted_values.sql)) -This test validates that there are no rows that match the given values. +#### not_accepted_values ([source](macros/generic_tests/not_accepted_values.sql)) +Asserts that there are no rows that match the given values. Usage: ```yaml @@ -295,8 +300,8 @@ models: values: ['Barcelona', 'New York'] ``` -#### relationships_where ([source](macros/schema_tests/relationships_where.sql)) -This test validates the referential integrity between two relations (same as the core relationships schema test) with an added predicate to filter out some rows from the test. This is useful to exclude records such as test entities, rows created in the last X minutes/hours to account for temporary gaps due to ETL limitations, etc. +#### relationships_where ([source](macros/generic_tests/relationships_where.sql)) +Asserts the referential integrity between two relations (same as the core relationships assertions) with an added predicate to filter out some rows from the test. This is useful to exclude records such as test entities, rows created in the last X minutes/hours to account for temporary gaps due to ETL limitations, etc. **Usage:** ```yaml @@ -314,9 +319,9 @@ models: to_condition: created_date >= '2020-01-01' ``` -#### mutually_exclusive_ranges ([source](macros/schema_tests/mutually_exclusive_ranges.sql)) -This test confirms that for a given lower_bound_column and upper_bound_column, -the ranges of between the lower and upper bounds do not overlap with the ranges +#### mutually_exclusive_ranges ([source](macros/generic_tests/mutually_exclusive_ranges.sql)) +Asserts that for a given lower_bound_column and upper_bound_column, +the ranges between the lower and upper bounds do not overlap with the ranges of another row. **Usage:** @@ -383,6 +388,7 @@ models: Additional `gaps` and `zero_length_range_allowed` examples **Understanding the `gaps` argument:** + Here are a number of examples for each allowed `gaps` argument. * `gaps: not_allowed`: The upper bound of one record must be the lower bound of the next record. @@ -431,7 +437,7 @@ models: -#### sequential_values ([source](macros/schema_tests/sequential_values.sql)) +#### sequential_values ([source](macros/generic_tests/sequential_values.sql)) This test confirms that a column contains sequential values. It can be used for both numeric values, and datetime values, as follows: ```yml @@ -459,8 +465,8 @@ seeds: * `interval` (default=1): The gap between two sequential values * `datepart` (default=None): Used when the gaps are a unit of time. If omitted, the test will check for a numeric gap. -#### unique_combination_of_columns ([source](macros/schema_tests/unique_combination_of_columns.sql)) -This test confirms that the combination of columns is unique. For example, the +#### unique_combination_of_columns ([source](macros/generic_tests/unique_combination_of_columns.sql)) +Asserts that the combination of columns is unique. For example, the combination of month and product is unique, however neither column is unique in isolation. @@ -495,8 +501,8 @@ An optional `quote_columns` argument (`default=false`) can also be used if a col ``` -#### accepted_range ([source](macros/schema_tests/accepted_range.sql)) -This test checks that a column's values fall inside an expected range. Any combination of `min_value` and `max_value` is allowed, and the range can be inclusive or exclusive. Provide a `where` argument to filter to specific records only. +#### accepted_range ([source](macros/generic_tests/accepted_range.sql)) +Asserts that a column's values fall inside an expected range. Any combination of `min_value` and `max_value` is allowed, and the range can be inclusive or exclusive. Provide a `where` argument to filter to specific records only. In addition to comparisons to a scalar value, you can also compare to another column's values. Any data type that supports the `>` or `<` operators can be compared, so you could also run tests like checking that all order dates are in the past. diff --git a/integration_tests/README.md b/integration_tests/README.md index 243af411..4f9f0131 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -26,14 +26,14 @@ Where possible, targets are being run in docker containers (this works for Postg ### Creating a new integration test -This directory contains an example dbt project which tests the macros in the `dbt-utils` package. An integration test typically involves making 1) a new seed file 2) a new model file 3) a schema test. +This directory contains an example dbt project which tests the macros in the `dbt-utils` package. An integration test typically involves making 1) a new seed file 2) a new model file 3) a generic test to assert anticipated behaviour. For an example integration tests, check out the tests for the `get_url_parameter` macro: 1. [Macro definition](https://github.com/fishtown-analytics/dbt-utils/blob/master/macros/web/get_url_parameter.sql) 2. [Seed file with fake data](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/data/web/data_urls.csv) 3. [Model to test the macro](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/models/web/test_urls.sql) -4. [A schema test to assert the macro works as expected](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/models/web/schema.yml#L2) +4. [A generic test to assert the macro works as expected](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/models/web/schema.yml#L2) Once you've added all of these files, you should be able to run: diff --git a/integration_tests/models/datetime/test_date_spine.sql b/integration_tests/models/datetime/test_date_spine.sql index 93cd07f1..fa4ae52b 100644 --- a/integration_tests/models/datetime/test_date_spine.sql +++ b/integration_tests/models/datetime/test_date_spine.sql @@ -1,6 +1,6 @@ -- snowflake doesn't like this as a view because the `generate_series` --- call creates a CTE called `unioned`, as does the `equality` schema test. +-- call creates a CTE called `unioned`, as does the `equality` generic test. -- Ideally, Snowflake would be smart enough to know that these CTE names are -- different, as they live in different relations. TODO: use a less common cte name diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml similarity index 100% rename from integration_tests/models/schema_tests/schema.yml rename to integration_tests/models/generic_tests/schema.yml diff --git a/integration_tests/models/schema_tests/test_equal_column_subset.sql b/integration_tests/models/generic_tests/test_equal_column_subset.sql similarity index 100% rename from integration_tests/models/schema_tests/test_equal_column_subset.sql rename to integration_tests/models/generic_tests/test_equal_column_subset.sql diff --git a/integration_tests/models/schema_tests/test_equal_rowcount.sql b/integration_tests/models/generic_tests/test_equal_rowcount.sql similarity index 100% rename from integration_tests/models/schema_tests/test_equal_rowcount.sql rename to integration_tests/models/generic_tests/test_equal_rowcount.sql diff --git a/integration_tests/models/schema_tests/test_fewer_rows_than.sql b/integration_tests/models/generic_tests/test_fewer_rows_than.sql similarity index 100% rename from integration_tests/models/schema_tests/test_fewer_rows_than.sql rename to integration_tests/models/generic_tests/test_fewer_rows_than.sql diff --git a/integration_tests/models/schema_tests/test_recency.sql b/integration_tests/models/generic_tests/test_recency.sql similarity index 100% rename from integration_tests/models/schema_tests/test_recency.sql rename to integration_tests/models/generic_tests/test_recency.sql diff --git a/integration_tests/models/sql/test_generate_series.sql b/integration_tests/models/sql/test_generate_series.sql index a943cf6c..11370b7b 100644 --- a/integration_tests/models/sql/test_generate_series.sql +++ b/integration_tests/models/sql/test_generate_series.sql @@ -1,6 +1,6 @@ -- snowflake doesn't like this as a view because the `generate_series` --- call creates a CTE called `unioned`, as does the `equality` schema test. +-- call creates a CTE called `unioned`, as does the `equality` generic test. -- Ideally, Snowflake would be smart enough to know that these CTE names are -- different, as they live in different relations. TODO: use a less common cte name diff --git a/macros/schema_tests/accepted_range.sql b/macros/generic_tests/accepted_range.sql similarity index 100% rename from macros/schema_tests/accepted_range.sql rename to macros/generic_tests/accepted_range.sql diff --git a/macros/schema_tests/at_least_one.sql b/macros/generic_tests/at_least_one.sql similarity index 100% rename from macros/schema_tests/at_least_one.sql rename to macros/generic_tests/at_least_one.sql diff --git a/macros/schema_tests/cardinality_equality.sql b/macros/generic_tests/cardinality_equality.sql similarity index 100% rename from macros/schema_tests/cardinality_equality.sql rename to macros/generic_tests/cardinality_equality.sql diff --git a/macros/schema_tests/equal_rowcount.sql b/macros/generic_tests/equal_rowcount.sql similarity index 100% rename from macros/schema_tests/equal_rowcount.sql rename to macros/generic_tests/equal_rowcount.sql diff --git a/macros/schema_tests/equality.sql b/macros/generic_tests/equality.sql similarity index 100% rename from macros/schema_tests/equality.sql rename to macros/generic_tests/equality.sql diff --git a/macros/schema_tests/expression_is_true.sql b/macros/generic_tests/expression_is_true.sql similarity index 100% rename from macros/schema_tests/expression_is_true.sql rename to macros/generic_tests/expression_is_true.sql diff --git a/macros/schema_tests/fewer_rows_than.sql b/macros/generic_tests/fewer_rows_than.sql similarity index 100% rename from macros/schema_tests/fewer_rows_than.sql rename to macros/generic_tests/fewer_rows_than.sql diff --git a/macros/schema_tests/mutually_exclusive_ranges.sql b/macros/generic_tests/mutually_exclusive_ranges.sql similarity index 100% rename from macros/schema_tests/mutually_exclusive_ranges.sql rename to macros/generic_tests/mutually_exclusive_ranges.sql diff --git a/macros/schema_tests/not_accepted_values.sql b/macros/generic_tests/not_accepted_values.sql similarity index 100% rename from macros/schema_tests/not_accepted_values.sql rename to macros/generic_tests/not_accepted_values.sql diff --git a/macros/schema_tests/not_constant.sql b/macros/generic_tests/not_constant.sql similarity index 100% rename from macros/schema_tests/not_constant.sql rename to macros/generic_tests/not_constant.sql diff --git a/macros/schema_tests/not_null_proportion.sql b/macros/generic_tests/not_null_proportion.sql similarity index 100% rename from macros/schema_tests/not_null_proportion.sql rename to macros/generic_tests/not_null_proportion.sql diff --git a/macros/schema_tests/recency.sql b/macros/generic_tests/recency.sql similarity index 100% rename from macros/schema_tests/recency.sql rename to macros/generic_tests/recency.sql diff --git a/macros/schema_tests/relationships_where.sql b/macros/generic_tests/relationships_where.sql similarity index 100% rename from macros/schema_tests/relationships_where.sql rename to macros/generic_tests/relationships_where.sql diff --git a/macros/schema_tests/sequential_values.sql b/macros/generic_tests/sequential_values.sql similarity index 100% rename from macros/schema_tests/sequential_values.sql rename to macros/generic_tests/sequential_values.sql diff --git a/macros/schema_tests/test_not_null_where.sql b/macros/generic_tests/test_not_null_where.sql similarity index 100% rename from macros/schema_tests/test_not_null_where.sql rename to macros/generic_tests/test_not_null_where.sql diff --git a/macros/schema_tests/test_unique_where.sql b/macros/generic_tests/test_unique_where.sql similarity index 100% rename from macros/schema_tests/test_unique_where.sql rename to macros/generic_tests/test_unique_where.sql diff --git a/macros/schema_tests/unique_combination_of_columns.sql b/macros/generic_tests/unique_combination_of_columns.sql similarity index 100% rename from macros/schema_tests/unique_combination_of_columns.sql rename to macros/generic_tests/unique_combination_of_columns.sql From 31577cb51dba5f6da06d1ccdb7cc6b4658107392 Mon Sep 17 00:00:00 2001 From: SunriseLong <44146580+SunriseLong@users.noreply.github.com> Date: Tue, 29 Mar 2022 00:03:33 -0400 Subject: [PATCH 21/25] Remove extraneous whitespace (#529) * rm whitespace from date_trunc * datediff * rm uncessary whitespace control * change log * fix CHANGELOG * address comments --- CHANGELOG.md | 2 ++ README.md | 1 + macros/cross_db_utils/date_trunc.sql | 8 ++++---- macros/cross_db_utils/datediff.sql | 16 ++++++++-------- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 366a4964..da3399cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ - `star()` will only alias columns if a prefix/suffix is provided, to allow the unmodified output to still be used in `group by` clauses etc. [#468](https://github.com/dbt-labs/dbt-utils/pull/468) - The `sequential_values` test is now compatible with quoted columns [#479](https://github.com/dbt-labs/dbt-utils/pull/479) - `pivot()` escapes values containing apostrophes [#503](https://github.com/dbt-labs/dbt-utils/pull/503) +- `date_trunc` and `datediff` default macros now have whitespace control to assist with linting and readability [#529](https://github.com/dbt-labs/dbt-utils/pull/529) ## Contributors: - [grahamwetzler](https://github.com/grahamwetzler) (#473) @@ -34,6 +35,7 @@ - [jelstongreen](https://github.com/jelstongreen) (#468) - [armandduijn](https://github.com/armandduijn) (#479) - [mdutoo](https://github.com/mdutoo) (#503) +- [sunriselong](https://github.com/sunriselong) (#529) # dbt-utils v0.8.0 diff --git a/README.md b/README.md index c9caa8cc..581f63ba 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [insert_by_period](#insert_by_period-source) ---- +======= ### Generic Tests #### equal_rowcount ([source](macros/generic_tests/equal_rowcount.sql)) Asserts that two relations have the same number of rows. diff --git a/macros/cross_db_utils/date_trunc.sql b/macros/cross_db_utils/date_trunc.sql index cba3346b..f9d0364b 100644 --- a/macros/cross_db_utils/date_trunc.sql +++ b/macros/cross_db_utils/date_trunc.sql @@ -2,14 +2,14 @@ {{ return(adapter.dispatch('date_trunc', 'dbt_utils') (datepart, date)) }} {%- endmacro %} -{% macro default__date_trunc(datepart, date) %} +{% macro default__date_trunc(datepart, date) -%} date_trunc('{{datepart}}', {{date}}) -{% endmacro %} +{%- endmacro %} -{% macro bigquery__date_trunc(datepart, date) %} +{% macro bigquery__date_trunc(datepart, date) -%} timestamp_trunc( cast({{date}} as timestamp), {{datepart}} ) -{% endmacro %} +{%- endmacro %} diff --git a/macros/cross_db_utils/datediff.sql b/macros/cross_db_utils/datediff.sql index 42dd738e..2b5d6613 100644 --- a/macros/cross_db_utils/datediff.sql +++ b/macros/cross_db_utils/datediff.sql @@ -3,7 +3,7 @@ {% endmacro %} -{% macro default__datediff(first_date, second_date, datepart) %} +{% macro default__datediff(first_date, second_date, datepart) -%} datediff( {{ datepart }}, @@ -11,10 +11,10 @@ {{ second_date }} ) -{% endmacro %} +{%- endmacro %} -{% macro bigquery__datediff(first_date, second_date, datepart) %} +{% macro bigquery__datediff(first_date, second_date, datepart) -%} datetime_diff( cast({{second_date}} as datetime), @@ -22,9 +22,9 @@ {{datepart}} ) -{% endmacro %} +{%- endmacro %} -{% macro postgres__datediff(first_date, second_date, datepart) %} +{% macro postgres__datediff(first_date, second_date, datepart) -%} {% if datepart == 'year' %} (date_part('year', ({{second_date}})::date) - date_part('year', ({{first_date}})::date)) @@ -55,12 +55,12 @@ {{ exceptions.raise_compiler_error("Unsupported datepart for macro datediff in postgres: {!r}".format(datepart)) }} {% endif %} -{% endmacro %} +{%- endmacro %} {# redshift should use default instead of postgres #} -{% macro redshift__datediff(first_date, second_date, datepart) %} +{% macro redshift__datediff(first_date, second_date, datepart) -%} {{ return(dbt_utils.default__datediff(first_date, second_date, datepart)) }} -{% endmacro %} +{%- endmacro %} From 1a517d222b0fa9dd2baf314edf60794aa13ef426 Mon Sep 17 00:00:00 2001 From: Grace Goheen <53586774+graciegoheen@users.noreply.github.com> Date: Wed, 6 Apr 2022 17:39:28 -0400 Subject: [PATCH 22/25] Feature/add listagg macro (#530) * Update README.md * Mutually excl range examples in disclosure triangle * Fix union_relations error when no include/exclude provided * Fix union_relations error when no include/exclude provided (#509) * Update CHANGELOG.md * Add to_condition to relationships where * very minor nit - update "an new" to "a new" (#519) * add quoting to split_part (#528) * add quoting to split_part * update docs for split_part * typo * corrected readme syntax * revert and update to just documentation * add new line * Update README.md * Update README.md * Update README.md Co-authored-by: Joel Labes * add macro to get columns (#516) * add macro to get columns * star macro should use get_columns * add adapter. * swap adapter for dbt_utils Co-authored-by: Joel Labes * update documentation * add output_lower arg * update name to get_filtered_columns_in_relation from get_columns * add tests * forgot args * too much whitespace removal ----------- Actual: ----------- --->"field_3"as "test_field_3"<--- ----------- Expected: ----------- --->"field_3" as "test_field_3"<--- * didnt mean to move a file that i did not create. moving things back. * remove lowercase logic * limit_zero Co-authored-by: Joel Labes * Add listagg macro and integration test * remove type in listagg macro * updated integration test * Add redshift to listagg macro * remove redshift listagg * explicitly named group by column * updated default values * Updated example to use correct double vs. single quotes * whitespace control * Added redshift specific macro * Remove documentation * Update integration test so less likely to accidentally work Co-authored-by: Joel Labes * default everything but measure to none * added limit functionality for other dbs * syntax bug for postgres * update redshift macro * fixed block def control * Fixed bug in redshift * Bug fix redshift * remove unused group_by arg * Added additional test without order by col * updated to regex replace * typo * added more integration_tests * attempt to make redshift less complicated * typo * update redshift * replace to substr * More explicit versions with added complexity * handle special characters Co-authored-by: Joel Labes Co-authored-by: Jamie Rosenberg Co-authored-by: Pat Kearns --- CHANGELOG.md | 1 + README.md | 61 ++++++++-- .../data/cross_db/data_listagg.csv | 10 ++ .../data/cross_db/data_listagg_output.csv | 10 ++ .../sql/data_filtered_columns_in_relation.csv | 4 + ..._filtered_columns_in_relation_expected.csv | 2 + .../macros/assert_equal_values.sql | 32 ++++++ .../models/cross_db_utils/schema.yml | 6 + .../models/cross_db_utils/test_listagg.sql | 69 ++++++++++++ integration_tests/models/sql/schema.yml | 10 ++ .../test_get_filtered_columns_in_relation.sql | 16 +++ .../models/sql/test_star_uppercase.sql | 13 +++ macros/cross_db_utils/listagg.sql | 104 ++++++++++++++++++ .../sql/get_filtered_columns_in_relation.sql | 25 +++++ macros/sql/star.sql | 15 +-- 15 files changed, 358 insertions(+), 20 deletions(-) create mode 100644 integration_tests/data/cross_db/data_listagg.csv create mode 100644 integration_tests/data/cross_db/data_listagg_output.csv create mode 100644 integration_tests/data/sql/data_filtered_columns_in_relation.csv create mode 100644 integration_tests/data/sql/data_filtered_columns_in_relation_expected.csv create mode 100644 integration_tests/macros/assert_equal_values.sql create mode 100644 integration_tests/models/cross_db_utils/test_listagg.sql create mode 100644 integration_tests/models/sql/test_get_filtered_columns_in_relation.sql create mode 100644 integration_tests/models/sql/test_star_uppercase.sql create mode 100644 macros/cross_db_utils/listagg.sql create mode 100644 macros/sql/get_filtered_columns_in_relation.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index da3399cb..8700864b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,4 @@ + # dbt-utils v0.8.3 ## New features - A macro for deduplicating data ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) diff --git a/README.md b/README.md index 581f63ba..58ba9e23 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [Introspective macros](#introspective-macros): - [get_column_values](#get_column_values-source) + - [get_filtered_columns_in_relation](#get_filtered_columns_in_relation-source) - [get_relations_by_pattern](#get_relations_by_pattern-source) - [get_relations_by_prefix](#get_relations_by_prefix-source) - [get_query_results_as_dict](#get_query_results_as_dict-source) @@ -59,6 +60,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [split_part](#split_part-source) - [last_day](#last_day-source) - [width_bucket](#width_bucket-source) + - [listagg](#listagg) - [Jinja Helpers](#jinja-helpers) - [pretty_time](#pretty_time-source) @@ -69,11 +71,11 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [insert_by_period](#insert_by_period-source) ---- -======= ### Generic Tests #### equal_rowcount ([source](macros/generic_tests/equal_rowcount.sql)) Asserts that two relations have the same number of rows. + **Usage:** ```yaml version: 2 @@ -387,7 +389,6 @@ models: ```
Additional `gaps` and `zero_length_range_allowed` examples - **Understanding the `gaps` argument:** Here are a number of examples for each allowed `gaps` argument. @@ -435,7 +436,6 @@ models: | 0 | 1 | | 2 | 2 | | 3 | 4 | -
#### sequential_values ([source](macros/generic_tests/sequential_values.sql)) @@ -551,7 +551,7 @@ These macros run a query and return the results of the query as objects. They ar #### get_column_values ([source](macros/sql/get_column_values.sql)) This macro returns the unique values for a column in a given [relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation) as an array. -Arguments: +**Args:** - `table` (required): a [Relation](https://docs.getdbt.com/reference/dbt-classes#relation) (a `ref` or `source`) that contains the list of columns you wish to select from - `column` (required): The name of the column you wish to find the column values of - `order_by` (optional, default=`'count(*) desc'`): How the results should be ordered. The default is to order by `count(*) desc`, i.e. decreasing frequency. Setting this as `'my_column'` will sort alphabetically, while `'min(created_at)'` will sort by when thevalue was first observed. @@ -592,6 +592,28 @@ Arguments: ... ``` +#### get_filtered_columns_in_relation ([source](macros/sql/get_filtered_columns_in_relation.sql)) +This macro returns an iterable Jinja list of columns for a given [relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation), (i.e. not from a CTE) +- optionally exclude columns +- the input values are not case-sensitive (input uppercase or lowercase and it will work!) +> Note: The native [`adapter.get_columns_in_relation` macro](https://docs.getdbt.com/reference/dbt-jinja-functions/adapter#get_columns_in_relation) allows you +to pull column names in a non-filtered fashion, also bringing along with it other (potentially unwanted) information, such as dtype, char_size, numeric_precision, etc. + +**Args:** +- `from` (required): a [Relation](https://docs.getdbt.com/reference/dbt-classes#relation) (a `ref` or `source`) that contains the list of columns you wish to select from +- `except` (optional, default=`[]`): The name of the columns you wish to exclude. (case-insensitive) + +**Usage:** +```sql +-- Returns a list of the columns from a relation, so you can then iterate in a for loop +{% set column_names = dbt_utils.get_filtered_columns_in_relation(from=ref('your_model'), except=["field_1", "field_2"]) %} +... +{% for column_name in column_names %} + max({{ column_name }}) ... as max_'{{ column_name }}', +{% endfor %} +... +``` + #### get_relations_by_pattern ([source](macros/sql/get_relations_by_pattern.sql)) Returns a list of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation) that match a given schema- or table-name pattern. @@ -770,9 +792,20 @@ group by 1,2,3 ``` #### star ([source](macros/sql/star.sql)) -This macro generates a comma-separated list of all fields that exist in the `from` relation, excluding any fields listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with the star macro. This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias (`relation_alias`.`field_name`). +This macro generates a comma-separated list of all fields that exist in the `from` relation, excluding any fields +listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with +the star macro. +This macro also has an optional `relation_alias` argument that will prefix all generated fields with an alias (`relation_alias`.`field_name`). +The macro also has optional `prefix` and `suffix` arguments. When one or both are provided, they will be concatenated onto each field's alias +in the output (`prefix` ~ `field_name` ~ `suffix`). NB: This prevents the output from being used in any context other than a select statement. + -The macro also has optional `prefix` and `suffix` arguments. When one or both are provided, they will be concatenated onto each field's alias in the output (`prefix` ~ `field_name` ~ `suffix`). NB: This prevents the output from being used in any context other than a select statement. +**Args:** +- `from` (required): a [Relation](https://docs.getdbt.com/reference/dbt-classes#relation) (a `ref` or `source`) that contains the list of columns you wish to select from +- `except` (optional, default=`[]`): The name of the columns you wish to exclude. (case-insensitive) +- `relation_alias` (optional, default=`''`): will prefix all generated fields with an alias (`relation_alias`.`field_name`). +- `prefix` (optional, default=`''`): will prefix the output `field_name` (`field_name as prefix_field_name`). +- `suffix` (optional, default=`''`): will suffix the output `field_name` (`field_name as field_name_suffix`). **Usage:** ```sql @@ -789,6 +822,13 @@ from {{ ref('my_model') }} ``` +```sql +select +{{ dbt_utils.star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"], prefix="max_") }} +from {{ ref('my_model') }} + +``` + #### union_relations ([source](macros/sql/union.sql)) This macro unions together an array of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation), @@ -987,9 +1027,16 @@ This macro calculates the difference between two dates. #### split_part ([source](macros/cross_db_utils/split_part.sql)) This macro splits a string of text using the supplied delimiter and returns the supplied part number (1-indexed). +**Args**: +- `string_text` (required): Text to be split into parts. +- `delimiter_text` (required): Text representing the delimiter to split by. +- `part_number` (required): Requested part of the split (1-based). If the value is negative, the parts are counted backward from the end of the string. + **Usage:** +When referencing a column, use one pair of quotes. When referencing a string, use single quotes enclosed in double quotes. ``` -{{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1) }} +{{ dbt_utils.split_part(string_text='column_to_split', delimiter_text='delimiter_column', part_number=1) }} +{{ dbt_utils.split_part(string_text="'1|2|3'", delimiter_text="'|'", part_number=1) }} ``` #### date_trunc ([source](macros/cross_db_utils/date_trunc.sql)) diff --git a/integration_tests/data/cross_db/data_listagg.csv b/integration_tests/data/cross_db/data_listagg.csv new file mode 100644 index 00000000..ee5083ba --- /dev/null +++ b/integration_tests/data/cross_db/data_listagg.csv @@ -0,0 +1,10 @@ +group_col,string_text,order_col +1,a,1 +1,b,2 +1,c,3 +2,a,2 +2,1,1 +2,p,3 +3,g,1 +3,g,2 +3,g,3 \ No newline at end of file diff --git a/integration_tests/data/cross_db/data_listagg_output.csv b/integration_tests/data/cross_db/data_listagg_output.csv new file mode 100644 index 00000000..a7e1c6c4 --- /dev/null +++ b/integration_tests/data/cross_db/data_listagg_output.csv @@ -0,0 +1,10 @@ +group_col,expected,version +1,"a_|_b_|_c",bottom_ordered +2,"1_|_a_|_p",bottom_ordered +3,"g_|_g_|_g",bottom_ordered +1,"a_|_b",bottom_ordered_limited +2,"1_|_a",bottom_ordered_limited +3,"g_|_g",bottom_ordered_limited +3,"g, g, g",comma_whitespace_unordered +3,"g",distinct_comma +3,"g,g,g",no_params \ No newline at end of file diff --git a/integration_tests/data/sql/data_filtered_columns_in_relation.csv b/integration_tests/data/sql/data_filtered_columns_in_relation.csv new file mode 100644 index 00000000..9d96143b --- /dev/null +++ b/integration_tests/data/sql/data_filtered_columns_in_relation.csv @@ -0,0 +1,4 @@ +field_1,field_2,field_3 +a,b,c +d,e,f +g,h,i \ No newline at end of file diff --git a/integration_tests/data/sql/data_filtered_columns_in_relation_expected.csv b/integration_tests/data/sql/data_filtered_columns_in_relation_expected.csv new file mode 100644 index 00000000..77ea0563 --- /dev/null +++ b/integration_tests/data/sql/data_filtered_columns_in_relation_expected.csv @@ -0,0 +1,2 @@ +field_2,field_3 +h,i \ No newline at end of file diff --git a/integration_tests/macros/assert_equal_values.sql b/integration_tests/macros/assert_equal_values.sql new file mode 100644 index 00000000..d4f02618 --- /dev/null +++ b/integration_tests/macros/assert_equal_values.sql @@ -0,0 +1,32 @@ +{% macro assert_equal_values(actual_object, expected_object) %} +{% if not execute %} + + {# pass #} + +{% elif actual_object != expected_object %} + + {% set msg %} + Expected did not match actual + + ----------- + Actual: + ----------- + --->{{ actual_object }}<--- + + ----------- + Expected: + ----------- + --->{{ expected_object }}<--- + + {% endset %} + + {{ log(msg, info=True) }} + + select 'fail' + +{% else %} + + select 'ok' {{ limit_zero() }} + +{% endif %} +{% endmacro %} \ No newline at end of file diff --git a/integration_tests/models/cross_db_utils/schema.yml b/integration_tests/models/cross_db_utils/schema.yml index dbe7a8f4..e1473c9f 100644 --- a/integration_tests/models/cross_db_utils/schema.yml +++ b/integration_tests/models/cross_db_utils/schema.yml @@ -58,6 +58,12 @@ models: - assert_equal: actual: actual expected: expected + + - name: test_listagg + tests: + - assert_equal: + actual: actual + expected: expected - name: test_safe_cast tests: diff --git a/integration_tests/models/cross_db_utils/test_listagg.sql b/integration_tests/models/cross_db_utils/test_listagg.sql new file mode 100644 index 00000000..006948de --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_listagg.sql @@ -0,0 +1,69 @@ +with data as ( + + select * from {{ ref('data_listagg') }} + +), + +data_output as ( + + select * from {{ ref('data_listagg_output') }} + +), + +calculate as ( + + select + group_col, + {{ dbt_utils.listagg('string_text', "'_|_'", "order by order_col") }} as actual, + 'bottom_ordered' as version + from data + group by group_col + + union all + + select + group_col, + {{ dbt_utils.listagg('string_text', "'_|_'", "order by order_col", 2) }} as actual, + 'bottom_ordered_limited' as version + from data + group by group_col + + union all + + select + group_col, + {{ dbt_utils.listagg('string_text', "', '") }} as actual, + 'comma_whitespace_unordered' as version + from data + where group_col = 3 + group by group_col + + union all + + select + group_col, + {{ dbt_utils.listagg('DISTINCT string_text', "','") }} as actual, + 'distinct_comma' as version + from data + where group_col = 3 + group by group_col + + union all + + select + group_col, + {{ dbt_utils.listagg('string_text') }} as actual, + 'no_params' as version + from data + where group_col = 3 + group by group_col + +) + +select + calculate.actual, + data_output.expected +from calculate +left join data_output +on calculate.group_col = data_output.group_col +and calculate.version = data_output.version \ No newline at end of file diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index ad990260..a78e5e1b 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -50,6 +50,11 @@ models: values: - '5' + - name: test_get_filtered_columns_in_relation + tests: + - dbt_utils.equality: + compare_model: ref('data_filtered_columns_in_relation_expected') + - name: test_get_relations_by_prefix_and_union columns: - name: event @@ -121,6 +126,11 @@ models: - dbt_utils.equality: compare_model: ref('data_star_aggregate_expected') + - name: test_star_uppercase + tests: + - dbt_utils.equality: + compare_model: ref('data_star_expected') + - name: test_surrogate_key tests: - assert_equal: diff --git a/integration_tests/models/sql/test_get_filtered_columns_in_relation.sql b/integration_tests/models/sql/test_get_filtered_columns_in_relation.sql new file mode 100644 index 00000000..7b3ca72f --- /dev/null +++ b/integration_tests/models/sql/test_get_filtered_columns_in_relation.sql @@ -0,0 +1,16 @@ +{% set exclude_field = 'field_1' %} +{% set column_names = dbt_utils.get_filtered_columns_in_relation(from= ref('data_filtered_columns_in_relation'), except=[exclude_field]) %} + +with data as ( + + select + + {% for column_name in column_names %} + max({{ column_name }}) as {{ column_name }} {% if not loop.last %},{% endif %} + {% endfor %} + + from {{ ref('data_filtered_columns_in_relation') }} + +) + +select * from data diff --git a/integration_tests/models/sql/test_star_uppercase.sql b/integration_tests/models/sql/test_star_uppercase.sql new file mode 100644 index 00000000..6179e691 --- /dev/null +++ b/integration_tests/models/sql/test_star_uppercase.sql @@ -0,0 +1,13 @@ +{% set exclude_field = 'FIELD_3' %} + + +with data as ( + + select + {{ dbt_utils.star(from=ref('data_star'), except=[exclude_field]) }} + + from {{ ref('data_star') }} + +) + +select * from data diff --git a/macros/cross_db_utils/listagg.sql b/macros/cross_db_utils/listagg.sql new file mode 100644 index 00000000..1d19a54f --- /dev/null +++ b/macros/cross_db_utils/listagg.sql @@ -0,0 +1,104 @@ +{% macro listagg(measure, delimiter_text="','", order_by_clause=none, limit_num=none) -%} + {{ return(adapter.dispatch('listagg', 'dbt_utils') (measure, delimiter_text, order_by_clause, limit_num)) }} +{%- endmacro %} + +{% macro default__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} + + {% if limit_num -%} + array_to_string( + array_slice( + array_agg( + {{ measure }} + ){% if order_by_clause -%} + within group ({{ order_by_clause }}) + {%- endif %} + ,0 + ,{{ limit_num }} + ), + {{ delimiter_text }} + ) + {%- else %} + listagg( + {{ measure }}, + {{ delimiter_text }} + ) + {% if order_by_clause -%} + within group ({{ order_by_clause }}) + {%- endif %} + {%- endif %} + +{%- endmacro %} + +{% macro bigquery__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} + + string_agg( + {{ measure }}, + {{ delimiter_text }} + {% if order_by_clause -%} + {{ order_by_clause }} + {%- endif %} + {% if limit_num -%} + limit {{ limit_num }} + {%- endif %} + ) + +{%- endmacro %} + +{% macro postgres__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} + + {% if limit_num -%} + array_to_string( + (array_agg( + {{ measure }} + {% if order_by_clause -%} + {{ order_by_clause }} + {%- endif %} + ))[1:{{ limit_num }}], + {{ delimiter_text }} + ) + {%- else %} + string_agg( + {{ measure }}, + {{ delimiter_text }} + {% if order_by_clause -%} + {{ order_by_clause }} + {%- endif %} + ) + {%- endif %} + +{%- endmacro %} + +{# if there are instances of delimiter_text within your measure, you cannot include a limit_num #} +{% macro redshift__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} + + {% if limit_num -%} + {% set ns = namespace() %} + {% set ns.delimiter_text_regex = delimiter_text|trim("'") %} + {% set special_chars %}\,^,$,.,|,?,*,+,(,),[,],{,}{% endset %} + {%- for char in special_chars.split(',') -%} + {% set escape_char %}\\{{ char }}{% endset %} + {% set ns.delimiter_text_regex = ns.delimiter_text_regex|replace(char,escape_char) %} + {%- endfor -%} + + {% set regex %}'([^{{ ns.delimiter_text_regex }}]+{{ ns.delimiter_text_regex }}){1,{{ limit_num - 1}}}[^{{ ns.delimiter_text_regex }}]+'{% endset %} + regexp_substr( + listagg( + {{ measure }}, + {{ delimiter_text }} + ) + {% if order_by_clause -%} + within group ({{ order_by_clause }}) + {%- endif %} + ,{{ regex }} + ) + {%- else %} + listagg( + {{ measure }}, + {{ delimiter_text }} + ) + {% if order_by_clause -%} + within group ({{ order_by_clause }}) + {%- endif %} + {%- endif %} + +{%- endmacro %} \ No newline at end of file diff --git a/macros/sql/get_filtered_columns_in_relation.sql b/macros/sql/get_filtered_columns_in_relation.sql new file mode 100644 index 00000000..7f4af889 --- /dev/null +++ b/macros/sql/get_filtered_columns_in_relation.sql @@ -0,0 +1,25 @@ +{% macro get_filtered_columns_in_relation(from, except=[]) -%} + {{ return(adapter.dispatch('get_filtered_columns_in_relation', 'dbt_utils')(from, except)) }} +{% endmacro %} + +{% macro default__get_filtered_columns_in_relation(from, except=[]) -%} + {%- do dbt_utils._is_relation(from, 'get_filtered_columns_in_relation') -%} + {%- do dbt_utils._is_ephemeral(from, 'get_filtered_columns_in_relation') -%} + + {# -- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} + {%- if not execute -%} + {{ return('') }} + {% endif %} + + {%- set include_cols = [] %} + {%- set cols = adapter.get_columns_in_relation(from) -%} + {%- set except = except | map("lower") | list %} + {%- for col in cols -%} + {%- if col.column|lower not in except -%} + {% do include_cols.append(col.column) %} + {%- endif %} + {%- endfor %} + + {{ return(include_cols) }} + +{%- endmacro %} \ No newline at end of file diff --git a/macros/sql/star.sql b/macros/sql/star.sql index 0bfa8c9a..72778b1f 100644 --- a/macros/sql/star.sql +++ b/macros/sql/star.sql @@ -11,20 +11,9 @@ {{ return('') }} {% endif %} - {%- set include_cols = [] %} - {%- set cols = adapter.get_columns_in_relation(from) -%} - {%- set except = except | map("lower") | list %} - {%- for col in cols -%} + {%- for col in dbt_utils.get_filtered_columns_in_relation(from, except) %} - {%- if col.column|lower not in except -%} - {% do include_cols.append(col.column) %} - - {%- endif %} - {%- endfor %} - - {%- for col in include_cols %} - - {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} {%- if prefix!='' or suffix!='' -%} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} {%- endif -%} + {%- if relation_alias %}{{ relation_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} {%- if prefix!='' or suffix!='' %} as {{ adapter.quote(prefix ~ col ~ suffix)|trim }} {%- endif -%} {%- if not loop.last %},{{ '\n ' }}{% endif %} {%- endfor -%} From 947f6c0893f7ffc859659cb0053ff07c81b85396 Mon Sep 17 00:00:00 2001 From: James McNeill <55981540+jpmmcneill@users.noreply.github.com> Date: Wed, 6 Apr 2022 23:09:37 +0100 Subject: [PATCH 23/25] patch default behaviour in get_column_values (#533) --- macros/sql/get_column_values.sql | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 2a7c62ee..f70890e2 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -3,11 +3,9 @@ {% endmacro %} {% macro default__get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none) -%} -{% if default is none %} - {% set default = [] %} -{% endif %} {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} {%- if not execute -%} + {% set default = [] if not default %} {{ return(default) }} {% endif %} From 72a5150f7107b1d24905201a7c2d1e3d78c6418e Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Fri, 8 Apr 2022 10:14:57 +1200 Subject: [PATCH 24/25] Update changelog, add missing quotes around get_table_types_sql --- CHANGELOG.md | 27 ++++++++++++++++++++++----- macros/sql/get_table_types_sql.sql | 2 +- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8700864b..01bb796b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,30 @@ # dbt-utils v0.8.3 ## New features -- A macro for deduplicating data ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) +- A macro for deduplicating data, `deduplicate()` ([#335](https://github.com/dbt-labs/dbt-utils/issues/335), [#512](https://github.com/dbt-labs/dbt-utils/pull/512)) +- A cross-database implementation of `listagg()` ([#530](https://github.com/dbt-labs/dbt-utils/pull/530)) +- A new macro to get the columns in a relation as a list, `get_filtered_columns_in_relation()`. This is similar to the `star()` macro, but creates a Jinja list instead of a comma-separated string. ([#516](https://github.com/dbt-labs/dbt-utils/pull/516)) + +## Fixes +- `get_column_values()` once more raises an error when the model doesn't exist and there is no default provided ([#531](https://github.com/dbt-labs/dbt-utils/issues/531), [#533](https://github.com/dbt-labs/dbt-utils/pull/533)) +- `get_column_values()` raises an error when used with an ephemeral model, instead of getting stuck in a compilation loop ([#358](https://github.com/dbt-labs/dbt-utils/issues/358), [#518](https://github.com/dbt-labs/dbt-utils/pull/518)) +- BigQuery materialized views work correctly with `get_relations_by_pattern()` ([#525](https://github.com/dbt-labs/dbt-utils/pull/525)) ## Quality of life -- Updated references to 'schema test' in project file structure and documentation referred to in [#485](https://github.com/dbt-labs/dbt-utils/issues/485) +- Updated references to 'schema test' in project file structure and documentation ([#485](https://github.com/dbt-labs/dbt-utils/issues/485), [#521](https://github.com/dbt-labs/dbt-utils/pull/521)) +- `date_trunc()` and `datediff()` default macros now have whitespace control to assist with linting and readability [#529](https://github.com/dbt-labs/dbt-utils/pull/529) +- `star()` no longer raises an error during SQLFluff linting ([#506](https://github.com/dbt-labs/dbt-utils/issues/506), [#532](https://github.com/dbt-labs/dbt-utils/pull/532)) + +## Contributors: +- [@judahrand](https://github.com/judahrand) (#512) +- [@b-moynihan](https://github.com/b-moynihan) (#521) +- [@sunriselong](https://github.com/sunriselong) (#529) +- [@jpmmcneill](https://github.com/jpmmcneill) (#533) +- [@KamranAMalik](https://github.com/KamranAMalik) (#532) +- [@graciegoheen](https://github.com/graciegoheen) (#530) +- [@luisleon90](https://github.com/luisleon90) (#525) +- [@epapineau](https://github.com/epapineau) (#518) +- [@patkearns10](https://github.com/patkearns10) (#516) # dbt-utils v0.8.2 ## Fixes @@ -26,7 +46,6 @@ - `star()` will only alias columns if a prefix/suffix is provided, to allow the unmodified output to still be used in `group by` clauses etc. [#468](https://github.com/dbt-labs/dbt-utils/pull/468) - The `sequential_values` test is now compatible with quoted columns [#479](https://github.com/dbt-labs/dbt-utils/pull/479) - `pivot()` escapes values containing apostrophes [#503](https://github.com/dbt-labs/dbt-utils/pull/503) -- `date_trunc` and `datediff` default macros now have whitespace control to assist with linting and readability [#529](https://github.com/dbt-labs/dbt-utils/pull/529) ## Contributors: - [grahamwetzler](https://github.com/grahamwetzler) (#473) @@ -36,8 +55,6 @@ - [jelstongreen](https://github.com/jelstongreen) (#468) - [armandduijn](https://github.com/armandduijn) (#479) - [mdutoo](https://github.com/mdutoo) (#503) -- [sunriselong](https://github.com/sunriselong) (#529) - # dbt-utils v0.8.0 ## 🚨 Breaking changes diff --git a/macros/sql/get_table_types_sql.sql b/macros/sql/get_table_types_sql.sql index c16275e9..e3f86884 100644 --- a/macros/sql/get_table_types_sql.sql +++ b/macros/sql/get_table_types_sql.sql @@ -8,7 +8,7 @@ when 'EXTERNAL TABLE' then 'external' when 'MATERIALIZED VIEW' then 'materializedview' else lower(table_type) - end as table_type + end as "table_type" {% endmacro %} From ab1cb1190efe783abcd530e17e81bddbbc6dfd05 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Fri, 8 Apr 2022 10:39:40 +1200 Subject: [PATCH 25/25] rm whitespace --- macros/sql/get_relations_by_pattern.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/sql/get_relations_by_pattern.sql b/macros/sql/get_relations_by_pattern.sql index aed8bd60..9325a883 100644 --- a/macros/sql/get_relations_by_pattern.sql +++ b/macros/sql/get_relations_by_pattern.sql @@ -19,7 +19,7 @@ database=database, schema=row.table_schema, identifier=row.table_name, - type = row.table_type + type=row.table_type ) -%} {%- do tbl_relations.append(tbl_relation) -%} {%- endfor -%}