From c02c009f8c6249866195e851769e7b6116aa8651 Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Wed, 31 Mar 2021 11:56:24 -0400 Subject: [PATCH] Add order_by argument to get_column_values (#349) --- CHANGELOG.md | 42 ++++++++++++++++--- README.md | 34 +++++++++++++-- .../models/sql/test_get_column_values.sql | 2 +- macros/sql/get_column_values.sql | 24 +++-------- 4 files changed, 73 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e07d4e27..38f11b888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,45 @@ # dbt-utils v0.7.0 (unreleased) ## Breaking changes + +### get_column_values +The order of (optional) arguments has changed in the `get_column_values` macro. + +Before: +```jinja +{% macro get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none) -%} +... +{% endmacro %} +``` + +After: +```jinja +{% macro get_column_values(table, column, max_records=none, default=none) -%} +... +{% endmacro %} +``` +If you were relying on the position to match up your optional arguments, this may be a breaking change — in general, we recommend that you explicitly declare any optional arguments (if not all of your arguments!) +``` +-- before: This works on previous version of dbt-utils, but on 0.7.0, the `50` would be passed through as the `order_by` argument +{% set payment_methods = dbt_utils.get_column_values( + ref('stg_payments'), + 'payment_method', + 50 +) %} + +-- after +{% set payment_methods = dbt_utils.get_column_values( + ref('stg_payments'), + 'payment_method', + max_records=50 +) %} +``` + ## Features +* Add new argument, `order_by`, to `get_column_values` (code originally in [#289](https://github.com/fishtown-analytics/dbt-utils/pull/289/) from [@clausherther](https://github.com/clausherther), merged via [#349](https://github.com/fishtown-analytics/dbt-utils/pull/349/)) ## Fixes + ## Under the hood # dbt-utils v0.6.5 @@ -11,12 +47,6 @@ * Add new `accepted_range` test ([#276](https://github.com/fishtown-analytics/dbt-utils/pull/276) [@joellabes](https://github.com/joellabes)) * Make `expression_is_true` work as a column test (code originally in [#226](https://github.com/fishtown-analytics/dbt-utils/pull/226/) from [@elliottohara](https://github.com/elliottohara), merged via [#313](https://github.com/fishtown-analytics/dbt-utils/pull/313/)) * Add new schema test, `not_accepted_values` ([#284](https://github.com/fishtown-analytics/dbt-utils/pull/284) [@JavierMonton](https://github.com/JavierMonton)) -* Support a new argument, `zero_length_range_allowed` in the `mutually_exclusive_ranges` test ([#307](https://github.com/fishtown-analytics/dbt-utils/pull/307) [@zemekeneng](https://github.com/zemekeneng)) -* Add new schema test, `sequential_values` ([#318](https://github.com/fishtown-analytics/dbt-utils/pull/318), inspired by [@hundredwatt](https://github.com/hundredwatt)) -* Support `quarter` in the `postgres__last_day` macro ([#333](https://github.com/fishtown-analytics/dbt-utils/pull/333/files) [@seunghanhong](https://github.com/seunghanhong)) -* Add new argument, `unit`, to `haversine_distance` ([#340](https://github.com/fishtown-analytics/dbt-utils/pull/340) [@bastienboutonnet](https://github.com/bastienboutonnet)) -* Add new schema test, `fewer_rows_than` (code originally in [#221](https://github.com/fishtown-analytics/dbt-utils/pull/230/) from [@dmarts](https://github.com/dmarts), merged via [#343](https://github.com/fishtown-analytics/dbt-utils/pull/343/)) - ## Fixes * Handle booleans gracefully in the unpivot macro ([#305](https://github.com/fishtown-analytics/dbt-utils/pull/305) [@avishalom](https://github.com/avishalom)) diff --git a/README.md b/README.md index b8a46afbc..23925a70a 100644 --- a/README.md +++ b/README.md @@ -510,8 +510,14 @@ These macros run a query and return the results of the query as objects. They ar #### get_column_values ([source](macros/sql/get_column_values.sql)) -This macro returns the unique values for a column in a given [relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation). -It takes an options `default` argument for compiling when the relation does not already exist. +This macro returns the unique values for a column in a given [relation](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation) as an array. + +Arguments: +- `table` (required): a [Relation](https://docs.getdbt.com/reference/dbt-classes#relation) (a `ref` or `source`) that contains the list of columns you wish to select from +- `column` (required): The name of the column you wish to find the column values of +- `order_by` (optional, default=`'count(*) desc'`): How the results should be ordered. The default is to order by `count(*) desc`, i.e. decreasing frequency. Setting this as `'my_column'` will sort alphabetically, while `'min(created_at)'` will sort by when thevalue was first observed. +- `max_records` (optional, default=`none`): The maximum number of column values you want to return +- `default` (optional, default=`[]`): The results this macro should return if the relation has not yet been created (and therefore has no column values). **Usage:** @@ -519,15 +525,35 @@ It takes an options `default` argument for compiling when the relation does not -- Returns a list of the payment_methods in the stg_payments model_ {% set payment_methods = dbt_utils.get_column_values(table=ref('stg_payments'), column='payment_method') %} -{% for state in states %} +{% for payment_method in payment_methods %} ... {% endfor %} ... ``` -#### get_relations_by_pattern ([source](macros/sql/get_relations_by_pattern.sql)) +```sql +-- Returns the list sorted alphabetically +{% set payment_methods = dbt_utils.get_column_values( + table=ref('stg_payments'), + column='payment_method', + order_by='payment_method' +) %} +``` +```sql +-- Returns the list sorted my most recently observed +{% set payment_methods = dbt_utils.get_column_values( + table=ref('stg_payments'), + column='payment_method', + order_by='max(created_at) desc', + max_records=50, + default=['bank_transfer', 'coupon', 'credit_card'] +%} +... +``` + +#### get_relations_by_pattern ([source](macros/sql/get_relations_by_pattern.sql)) Returns a list of [Relations](https://docs.getdbt.com/docs/writing-code-in-dbt/class-reference/#relation) that match a given schema- or table-name pattern. diff --git a/integration_tests/models/sql/test_get_column_values.sql b/integration_tests/models/sql/test_get_column_values.sql index bfe5c4863..8e6f5b450 100644 --- a/integration_tests/models/sql/test_get_column_values.sql +++ b/integration_tests/models/sql/test_get_column_values.sql @@ -1,5 +1,5 @@ -{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default = []) %} +{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field', default=[], order_by="field") %} {% if target.type == 'snowflake' %} diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index 39aa96c9b..254cc9c72 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -1,26 +1,13 @@ -{# -This macro fetches the unique values for `column` in the table `table` - -Arguments: - table: A model `ref`, or a schema.table string for the table to query (Required) - column: The column to query for unique values - max_records: If provided, the maximum number of unique records to return (default: none) - -Returns: - A list of distinct values for the specified columns -#} - -{% macro get_column_values(table, column, max_records=none, default=none) -%} - {{ return(adapter.dispatch('get_column_values', packages = dbt_utils._get_utils_namespaces())(table, column, max_records, default)) }} +{% macro get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none) -%} + {{ return(adapter.dispatch('get_column_values', packages = dbt_utils._get_utils_namespaces())(table, column, order_by, max_records, default)) }} {% endmacro %} -{% macro default__get_column_values(table, column, max_records=none, default=none) -%} +{% macro default__get_column_values(table, column, order_by='count(*) desc', max_records=none, default=none) -%} -{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} + {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} {%- if not execute -%} {{ return('') }} {% endif %} -{#-- #} {%- set target_relation = adapter.get_relation(database=table.database, schema=table.schema, @@ -40,12 +27,13 @@ Returns: {%- else -%} + select {{ column }} as value from {{ target_relation }} group by 1 - order by count(*) desc + order by {{ order_by }} {% if max_records is not none %} limit {{ max_records }}