Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add macro to apply tags to source columns in on-run-end hook #16

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,14 @@ on-run-start:
{% if var("fresh", false) %}
create or replace schema {{ dbt_tags.get_resource_ns() }};
{% endif %}
on-run-end:
- >
{% if flags.WHICH in ('run', 'build') %}
{{ dbt_tags.apply_source_column_tags() }}
{% endif %}

vars:
dbt_tags__tag_source_columns: true
dbt_tags__database: dbt_tags
dbt_tags__schema: common
dbt_tags__resource_types: ["model", "snapshot", "source"]
Expand All @@ -34,3 +40,7 @@ models:
{% if flags.WHICH in ('run', 'build') %}
{{ dbt_tags.apply_column_tags() }}
{% endif %}

seeds:
dbt_tags_test:
+schema: raw
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ depends on:
with dbt_project_tags as (
{% for item in dbt_project_tags if ".column" in item["level"] %}

{% set model_fqn -%}
{{ target.database }}.{{ target.schema }}.{{ item.level.split(".")[1] }}
{%- endset %}

{% set tag_name = item.tag.split(tag_name_separator)[0] %}
{% if not item.tag.split(tag_name_separator)[1] %}
{% set tag_value = item.name %}
Expand All @@ -34,7 +30,7 @@ with dbt_project_tags as (

select lower('{{ tag_name }}') as tag_name,
lower('{{ tag_value }}') as tag_value,
lower('{{ model_fqn }}') as model_name,
lower('{{ item.model_fqn }}') as model_name,
lower('{{ item.name }}') as column_name,

{% if not loop.last %}
Expand All @@ -48,18 +44,14 @@ column_tag_references as (

{% for item in dbt_project_tags if ".column" in item.level %}

{% set model_fqn -%}
{{ target.database }}.{{ target.schema }}.{{ item.level.split(".")[1] }}
{%- endset %}

select lower(tag_name) as tag_name,
lower(tag_value) as tag_value,
lower(object_database || '.' || object_schema || '.' || object_name) as model_name,
lower(column_name) as column_name,

from table(
information_schema.tag_references(
'{{ model_fqn }}.{{ item.name }}', 'COLUMN'
'{{ item.model_fqn }}.{{ item.name }}', 'COLUMN'
)
)

Expand Down
17 changes: 17 additions & 0 deletions integration_tests/models/sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
version: 2

sources:
- name: jaffle_shop
database: dbt_tags
schema: raw
tables:
- name: raw_orders
columns:
- name: user_id
tags:
- 'pii_null~user id'
- name: raw_customers
columns:
- name: first_name
tags:
- pii_name
24 changes: 1 addition & 23 deletions macros/apply-tags/apply_column_tags.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,7 @@
{% set tag_ns = dbt_tags.get_resource_ns() %}
{% set tag_name_separator = var('dbt_tags__tag_name_separator','~') %}
{% set query %}

{% for key, value in model.columns.items() -%}
{% if value.tags is defined -%}
{% for column_tag in value.tags if dbt_tags.is_allowed_tags(column_tag.split(tag_name_separator)[0]) %}

{%- set relation -%}
{{ model.database }}.{{ model.schema }}.{{ model.alias or model.name }}
{%- endset %}

alter table {{ relation }}
alter column {{ key }}
set tag {{ tag_ns }}.{{ column_tag.split(tag_name_separator)[0] }} =
{%- if tag_name_separator in column_tag -%}
'{{ column_tag.split(tag_name_separator)[1] }}';
{%- else -%}
'{{ key }}';
{%- endif %}
{{- log("dbt_tags.apply_column_tags - Set tag [" ~ tag_ns ~ "." ~ column_tag ~ "] on column [" ~ relation ~ ":" ~ key ~ "]", info=True) -}}

{%- endfor %}
{%- endif %}
{%- endfor %}

{{dbt_tags.apply_column_tags_query(model)}}
{% endset %}

{{ return(query) }}
Expand Down
37 changes: 37 additions & 0 deletions macros/apply-tags/apply_column_tags_query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{% macro apply_column_tags_query(resource) -%}
{{ return(adapter.dispatch('apply_column_tags_query', 'dbt_tags')(resource)) }}
{%- endmacro %}

{% macro default__apply_column_tags_query(resource) %}

{% set tag_ns = dbt_tags.get_resource_ns() %}
{% set tag_name_separator = var('dbt_tags__tag_name_separator','~') %}
{% set query %}

{% for key, value in resource.columns.items() -%}
{% if value.tags is defined -%}
{% for column_tag in value.tags if dbt_tags.is_allowed_tags(column_tag.split(tag_name_separator)[0]) %}

{%- set relation -%}
{{ resource.database }}.{{ resource.schema }}.{{ resource.alias or resource.name }}
{%- endset %}

alter table {{ relation }}
alter column {{ key }}
set tag {{ tag_ns }}.{{ column_tag.split(tag_name_separator)[0] }} =
{%- if tag_name_separator in column_tag -%}
'{{ column_tag.split(tag_name_separator)[1] }}';
{%- else -%}
'{{ key }}';
{%- endif %}
{{- log("dbt_tags.apply_column_tags_query - Set tag [" ~ tag_ns ~ "." ~ column_tag ~ "] on column [" ~ relation ~ ":" ~ key ~ "]", info=True) -}}

{%- endfor %}
{%- endif %}
{%- endfor %}

{% endset %}

{{ return(query) }}

{% endmacro %}
16 changes: 16 additions & 0 deletions macros/apply-tags/apply_column_tags_query.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
macros:
- name: apply_column_tags_query
description: |
This macro is designed to dynamically generate SQL statements that alter table columns by
setting tags for them, based on a resource's column definitions and tags.

**Usage**
```
{% raw %}
{{ adapter.dispatch('apply_column_tags_query', 'dbt_tags')(model) }}
{% endraw %}
```
arguments:
- name: resource
type: graph.node
description: graph.node object, This contains information about complete resource like resource's columns, including tags that are defined for each column etc.
18 changes: 18 additions & 0 deletions macros/apply-tags/apply_source_column_tags.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{% macro apply_source_column_tags() -%}
{{ return(adapter.dispatch('apply_source_column_tags', 'dbt_tags')()) }}
{%- endmacro %}

{% macro default__apply_source_column_tags() %}

{% if not execute or not var('dbt_tags__tag_source_columns',true)%}
{{ return("") }}
{% endif %}

{% set query %}
{%- for source in graph.sources.values() -%}
{{dbt_tags.apply_column_tags_query(source)}}
{%- endfor %}
{%- endset %}
{{return(query)}}

{% endmacro %}
36 changes: 36 additions & 0 deletions macros/apply-tags/apply_source_column_tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
macros:
- name: apply_source_column_tags
description: |
Used to apply tags to columns of source tables.

Looking for all tags at all columns in all sources, set the (allowed) dbt tags as the adapter tags to the configured column.
Use the macro in the on-run-end hook to apply tags to source columns.
Tags can be assigned to columns using a separator ("~" by default) to denote a tag value. If no value is provided, the column name is used as the default value.
Tag values are able to be set by adding in a separator ("~" by default) and then the value afterwards which will be set as a string. If you don't set a value, then the name of the column is set as the value.
Example, without a value:
- column: customer_first_name
- dbt tag name: pii_name
- dbt tag value set on column: "pii_name"
- Tag Key: pii_name
- Tag Value: "customer_first_name"

Example, with a value:
- column: membership_id
- dbt tag name: pii_mask_x_last_characters
- dbt tag value set on column: "pii_mask_x_last_characters~4"
- Tag Key: pii_mask_x_last_characters
- Tag Value: "4"
Note: To unapply the tags, let's simply re-create the table/view!

"~" (tilda) is defined as the default separator. If you want to change it to another character if you use this in your tag names, set the var "dbt_tags__tag_name_separator" in "dbt_project.yml" to another character.

**Usage** in `dbt_project.yml` file:
```
{% raw %}
on-run-end:
- >
{% if flags.WHICH in ('run', 'build') %}
{{ dbt_tags.apply_source_column_tags() }}
{% endif %}
{% endraw %}
```
4 changes: 2 additions & 2 deletions macros/utils/get_dbt_column_tags.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@

{% for column_tag in value.tags if dbt_tags.is_allowed_tags(column_tag.split(tag_name_separator)[0]) %}
{% if with_value %}
{% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag} %}
{% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag, "model_fqn": relation.relation_name} %}
{% else %}
{% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag.split(tag_name_separator)[0]} %}
{% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag.split(tag_name_separator)[0], "model_fqn": relation.relation_name} %}
{# {{ log(found_tag, info=True) if debug}} #}
{% endif %}
{% do found_tags.append(found_tag) %}
Expand Down
4 changes: 2 additions & 2 deletions macros/utils/get_dbt_relation_tags.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
{% if relation.tags is defined %}
{% for relation_tag in relation.tags if dbt_tags.is_allowed_tags(relation_tag.split(tag_name_separator)[0]) %}
{% if with_value %}
{% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag} %}
{% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag, "model_fqn": relation.relation_name} %}
{% else %}
{% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag.split(tag_name_separator)[0]} %}
{% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag.split(tag_name_separator)[0], "model_fqn": relation.relation_name} %}
{# {{ log(found_tag, info=True) if debug}} #}
{% endif %}
{% do found_tags.append(found_tag) %}
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ dbt-tags-tagging = [
dbt-tags-test-tagging = [
{cmd = "dbt deps --project-dir integration_tests"},
{cmd = "poe dbt-tags-tagging"},
{cmd = "dbt build --exclude tag:cleanup --project-dir integration_tests"},
{cmd = "dbt build -s resource_type:seed --project-dir integration_tests"},
{cmd = "dbt build --exclude tag:cleanup resource_type:seed --project-dir integration_tests"},
]
dbt-tags-test-unapply-mps = [
{cmd = "poe dbt-tags-tagging"},
Expand All @@ -102,8 +103,8 @@ dbt-tags-test-unapply-mps = [
dbt-tags-test-drop-tags = [
{cmd = "poe dbt-tags-tagging"},
{cmd = "dbt run-operation drop_tags --project-dir integration_tests"},
{cmd = "dbt run -s verify_if_tags_created_correctly --project-dir integration_tests"},
{cmd = "dbt test -s verify_if_drop_tags_correctly --project-dir integration_tests"},
{cmd = "dbt run -s verify_if_tags_created_correctly --project-dir integration_tests --vars {'dbt_tags__tag_source_columns':false}"},
{cmd = "dbt test -s verify_if_drop_tags_correctly --project-dir integration_tests --vars {'dbt_tags__tag_source_columns':false}"},
]
dbt-tags-test = [
{cmd = "poe dbt-tags-test-tagging"},
Expand Down
Loading