From d04d6b3bfa4b0e0800e95ecf9c49f4140f07bb5b Mon Sep 17 00:00:00 2001 From: Benjamin Ryon Date: Sat, 19 Dec 2020 00:28:22 -0800 Subject: [PATCH] Add zero_length_range_allowed arg to mutually_exclusive_ranges (#307) Use boolean for zero range arg. Update changelog --- CHANGELOG.md | 2 ++ README.md | 29 +++++++++++++++++++ ...exclusive_ranges_with_gaps_zero_length.csv | 7 +++++ .../models/schema_tests/schema.yml | 8 +++++ .../mutually_exclusive_ranges.sql | 25 +++++++++++----- 5 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_with_gaps_zero_length.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 45aa9316644..332f65000d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ * Add new `accepted_range` test ([#276](https://github.com/fishtown-analytics/dbt-utils/pull/276) [@joellabes](https://github.com/joellabes)) * Make `expression_is_true` work as a column test (code originally in [#226](https://github.com/fishtown-analytics/dbt-utils/pull/226/) from [@elliottohara](https://github.com/elliottohara), merged via [#313]) * Add new schema test, `not_accepted_values` ([#284](https://github.com/fishtown-analytics/dbt-utils/pull/284) [@JavierMonton](https://github.com/JavierMonton)) +* Support a new argument, `zero_length_range_allowed` in the `mutually_exclusive_ranges` test ([#307](https://github.com/fishtown-analytics/dbt-utils/pull/307) [@zemekeng](https://github.com/zemekeneng)) + ## Fixes * Handle booleans gracefully in the unpivot macro ([#305](https://github.com/fishtown-analytics/dbt-utils/pull/305) [@avishalom](https://github.com/avishalom)) diff --git a/README.md b/README.md index 6fac36cfda4..9c04c8215d0 100644 --- a/README.md +++ b/README.md @@ -361,6 +361,15 @@ models: upper_bound_column: ended_at partition_by: customer_id gaps: required + + # test that each customer can have subscriptions that start and end on the same date + - name: subscriptions + tests: + - dbt_utils.mutually_exclusive_ranges: + lower_bound_column: started_at + upper_bound_column: ended_at + partition_by: customer_id + zero_length_range_allowed: true ``` **Args:** * `lower_bound_column` (required): The name of the column that represents the @@ -372,6 +381,8 @@ upper value of the range. Must be not null. argument to indicate which column to partition by. `default=none` * `gaps` (optional): Whether there can be gaps are allowed between ranges. `default='allowed', one_of=['not_allowed', 'allowed', 'required']` +* `zero_length_range_allowed` (optional): Whether ranges can start and end on the same date. +`default=False` **Note:** Both `lower_bound_column` and `upper_bound_column` should be not null. If this is not the case in your data source, consider passing a coalesce function @@ -418,6 +429,24 @@ the lower bound of the next record (common for date ranges). | 2 | 3 | | 4 | 5 | +**Understanding the `zero_length_range_allowed` parameter:** +Here are a number of examples for each allowed `zero_length_range_allowed` parameter. +* `zero_length_range_allowed: false`: (default) The upper bound of each record must be greater than its lower bound. + +| lower_bound | upper_bound | +|-------------|-------------| +| 0 | 1 | +| 1 | 2 | +| 2 | 3 | + +* `zero_length_range_allowed: true`: The upper bound of each record can be greater than or equal to its lower bound. + +| lower_bound | upper_bound | +|-------------|-------------| +| 0 | 1 | +| 2 | 2 | +| 3 | 4 | + #### unique_combination_of_columns ([source](macros/schema_tests/unique_combination_of_columns.sql)) This test confirms that the combination of columns is unique. For example, the combination of month and product is unique, however neither column is unique diff --git a/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_with_gaps_zero_length.csv b/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_with_gaps_zero_length.csv new file mode 100644 index 00000000000..29330c8f7e6 --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_mutually_exclusive_ranges_with_gaps_zero_length.csv @@ -0,0 +1,7 @@ +subscription_id,valid_from,valid_to +3,2020-05-06,2020-05-07 +3,2020-05-08,2020-05-08 +3,2020-05-09,2020-05-10 +4,2020-06-06,2020-06-07 +4,2020-06-08,2020-06-08 +4,2020-06-09,2020-06-10 \ No newline at end of file diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 0604be34da6..44d6b296f7d 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -112,6 +112,14 @@ models: partition_by: subscription_id gaps: required + - name: data_test_mutually_exclusive_ranges_with_gaps_zero_length + tests: + - dbt_utils.mutually_exclusive_ranges: + lower_bound_column: valid_from + upper_bound_column: valid_to + partition_by: subscription_id + zero_length_range_allowed: true + - name: data_unique_combination_of_columns tests: - dbt_utils.unique_combination_of_columns: diff --git a/macros/schema_tests/mutually_exclusive_ranges.sql b/macros/schema_tests/mutually_exclusive_ranges.sql index cd7a33d03ce..278a048ceb7 100644 --- a/macros/schema_tests/mutually_exclusive_ranges.sql +++ b/macros/schema_tests/mutually_exclusive_ranges.sql @@ -1,9 +1,8 @@ -{% macro test_mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed') %} - {{ return(adapter.dispatch('test_mutually_exclusive_ranges', packages = dbt_utils._get_utils_namespaces())(model, lower_bound_column, upper_bound_column, partition_by, gaps)) }} +{% macro test_mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed', zero_length_range_allowed=False) %} + {{ return(adapter.dispatch('test_mutually_exclusive_ranges', packages = dbt_utils._get_utils_namespaces())(model, lower_bound_column, upper_bound_column, partition_by, gaps, zero_length_range_allowed)) }} {% endmacro %} -{% macro default__test_mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed') %} - +{% macro default__test_mutually_exclusive_ranges(model, lower_bound_column, upper_bound_column, partition_by=None, gaps='allowed', zero_length_range_allowed=False) %} {% if gaps == 'not_allowed' %} {% set allow_gaps_operator='=' %} {% set allow_gaps_operator_in_words='equal_to' %} @@ -17,7 +16,17 @@ {{ exceptions.raise_compiler_error( "`gaps` argument for mutually_exclusive_ranges test must be one of ['not_allowed', 'allowed', 'required'] Got: '" ~ gaps ~"'.'" ) }} - +{% endif %} +{% if not zero_length_range_allowed %} + {% set allow_zero_length_operator='<' %} + {% set allow_zero_length_operator_in_words='less_than' %} +{% elif zero_length_range_allowed %} + {% set allow_zero_length_operator='<=' %} + {% set allow_zero_length_operator_in_words='less_than_or_equal_to' %} +{% else %} + {{ exceptions.raise_compiler_error( + "`zero_length_range_allowed` argument for mutually_exclusive_ranges test must be one of [true, false] Got: '" ~ zero_length_range_allowed ~"'.'" + ) }} {% endif %} {% set partition_clause="partition by " ~ partition_by if partition_by else '' %} @@ -55,9 +64,9 @@ calc as ( -- Coalesce it to return an error on the null case (implicit assumption -- these columns are not_null) coalesce( - lower_bound < upper_bound, + lower_bound {{ allow_zero_length_operator }} upper_bound, false - ) as lower_bound_less_than_upper_bound, + ) as lower_bound_{{ allow_zero_length_operator_in_words }}_upper_bound, -- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound. -- Coalesce it to handle null cases for the last record. @@ -79,7 +88,7 @@ validation_errors as ( where not( -- THE FOLLOWING SHOULD BE TRUE -- - lower_bound_less_than_upper_bound + lower_bound_{{ allow_zero_length_operator_in_words }}_upper_bound and upper_bound_{{ allow_gaps_operator_in_words }}_next_lower_bound ) )