diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 64ee008..e776044 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -14,8 +14,14 @@ on-run-start: {% if var("fresh", false) %} create or replace schema {{ dbt_tags.get_resource_ns() }}; {% endif %} +on-run-end: + - > + {% if flags.WHICH in ('run', 'build') %} + {{ dbt_tags.apply_source_column_tags() }} + {% endif %} vars: + dbt_tags__tag_source_columns: true dbt_tags__database: dbt_tags dbt_tags__schema: common dbt_tags__resource_types: ["model", "snapshot", "source"] @@ -34,3 +40,7 @@ models: {% if flags.WHICH in ('run', 'build') %} {{ dbt_tags.apply_column_tags() }} {% endif %} + +seeds: + dbt_tags_test: + +schema: raw diff --git a/integration_tests/models/behavior/verify_if_tags_applied_correctly.sql b/integration_tests/models/behavior/verify_if_tags_applied_correctly.sql index c7b02ed..0040822 100644 --- a/integration_tests/models/behavior/verify_if_tags_applied_correctly.sql +++ b/integration_tests/models/behavior/verify_if_tags_applied_correctly.sql @@ -20,10 +20,6 @@ depends on: with dbt_project_tags as ( {% for item in dbt_project_tags if ".column" in item["level"] %} - {% set model_fqn -%} - {{ target.database }}.{{ target.schema }}.{{ item.level.split(".")[1] }} - {%- endset %} - {% set tag_name = item.tag.split(tag_name_separator)[0] %} {% if not item.tag.split(tag_name_separator)[1] %} {% set tag_value = item.name %} @@ -34,7 +30,7 @@ with dbt_project_tags as ( select lower('{{ tag_name }}') as tag_name, lower('{{ tag_value }}') as tag_value, - lower('{{ model_fqn }}') as model_name, + lower('{{ item.model_fqn }}') as model_name, lower('{{ item.name }}') as column_name, {% if not loop.last %} @@ -48,10 +44,6 @@ column_tag_references as ( {% for item in dbt_project_tags if ".column" in item.level %} - {% set model_fqn -%} - {{ target.database }}.{{ target.schema }}.{{ item.level.split(".")[1] }} - {%- endset %} - select lower(tag_name) as tag_name, lower(tag_value) as tag_value, lower(object_database || '.' || object_schema || '.' || object_name) as model_name, @@ -59,7 +51,7 @@ column_tag_references as ( from table( information_schema.tag_references( - '{{ model_fqn }}.{{ item.name }}', 'COLUMN' + '{{ item.model_fqn }}.{{ item.name }}', 'COLUMN' ) ) diff --git a/integration_tests/models/sources.yml b/integration_tests/models/sources.yml new file mode 100644 index 0000000..72e6e26 --- /dev/null +++ b/integration_tests/models/sources.yml @@ -0,0 +1,17 @@ +version: 2 + +sources: + - name: jaffle_shop + database: dbt_tags + schema: raw + tables: + - name: raw_orders + columns: + - name: user_id + tags: + - 'pii_null~user id' + - name: raw_customers + columns: + - name: first_name + tags: + - pii_name diff --git a/macros/apply-tags/apply_column_tags.sql b/macros/apply-tags/apply_column_tags.sql index 6739eac..ec38ae6 100644 --- a/macros/apply-tags/apply_column_tags.sql +++ b/macros/apply-tags/apply_column_tags.sql @@ -11,29 +11,7 @@ {% set tag_ns = dbt_tags.get_resource_ns() %} {% set tag_name_separator = var('dbt_tags__tag_name_separator','~') %} {% set query %} - - {% for key, value in model.columns.items() -%} - {% if value.tags is defined -%} - {% for column_tag in value.tags if dbt_tags.is_allowed_tags(column_tag.split(tag_name_separator)[0]) %} - - {%- set relation -%} - {{ model.database }}.{{ model.schema }}.{{ model.alias or model.name }} - {%- endset %} - - alter table {{ relation }} - alter column {{ key }} - set tag {{ tag_ns }}.{{ column_tag.split(tag_name_separator)[0] }} = - {%- if tag_name_separator in column_tag -%} - '{{ column_tag.split(tag_name_separator)[1] }}'; - {%- else -%} - '{{ key }}'; - {%- endif %} - {{- log("dbt_tags.apply_column_tags - Set tag [" ~ tag_ns ~ "." ~ column_tag ~ "] on column [" ~ relation ~ ":" ~ key ~ "]", info=True) -}} - - {%- endfor %} - {%- endif %} - {%- endfor %} - + {{dbt_tags.apply_column_tags_query(model)}} {% endset %} {{ return(query) }} diff --git a/macros/apply-tags/apply_column_tags_query.sql b/macros/apply-tags/apply_column_tags_query.sql new file mode 100644 index 0000000..714a479 --- /dev/null +++ b/macros/apply-tags/apply_column_tags_query.sql @@ -0,0 +1,37 @@ +{% macro apply_column_tags_query(resource) -%} + {{ return(adapter.dispatch('apply_column_tags_query', 'dbt_tags')(resource)) }} +{%- endmacro %} + +{% macro default__apply_column_tags_query(resource) %} + + {% set tag_ns = dbt_tags.get_resource_ns() %} + {% set tag_name_separator = var('dbt_tags__tag_name_separator','~') %} + {% set query %} + + {% for key, value in resource.columns.items() -%} + {% if value.tags is defined -%} + {% for column_tag in value.tags if dbt_tags.is_allowed_tags(column_tag.split(tag_name_separator)[0]) %} + + {%- set relation -%} + {{ resource.database }}.{{ resource.schema }}.{{ resource.alias or resource.name }} + {%- endset %} + + alter table {{ relation }} + alter column {{ key }} + set tag {{ tag_ns }}.{{ column_tag.split(tag_name_separator)[0] }} = + {%- if tag_name_separator in column_tag -%} + '{{ column_tag.split(tag_name_separator)[1] }}'; + {%- else -%} + '{{ key }}'; + {%- endif %} + {{- log("dbt_tags.apply_column_tags_query - Set tag [" ~ tag_ns ~ "." ~ column_tag ~ "] on column [" ~ relation ~ ":" ~ key ~ "]", info=True) -}} + + {%- endfor %} + {%- endif %} + {%- endfor %} + + {% endset %} + + {{ return(query) }} + +{% endmacro %} diff --git a/macros/apply-tags/apply_column_tags_query.yml b/macros/apply-tags/apply_column_tags_query.yml new file mode 100644 index 0000000..c4f7907 --- /dev/null +++ b/macros/apply-tags/apply_column_tags_query.yml @@ -0,0 +1,16 @@ +macros: + - name: apply_column_tags_query + description: | + This macro is designed to dynamically generate SQL statements that alter table columns by + setting tags for them, based on a resource's column definitions and tags. + + **Usage** + ``` + {% raw %} + {{ adapter.dispatch('apply_column_tags_query', 'dbt_tags')(model) }} + {% endraw %} + ``` + arguments: + - name: resource + type: graph.node + description: graph.node object, This contains information about complete resource like resource's columns, including tags that are defined for each column etc. diff --git a/macros/apply-tags/apply_source_column_tags.sql b/macros/apply-tags/apply_source_column_tags.sql new file mode 100644 index 0000000..c2e744c --- /dev/null +++ b/macros/apply-tags/apply_source_column_tags.sql @@ -0,0 +1,18 @@ +{% macro apply_source_column_tags() -%} + {{ return(adapter.dispatch('apply_source_column_tags', 'dbt_tags')()) }} +{%- endmacro %} + +{% macro default__apply_source_column_tags() %} + + {% if not execute or not var('dbt_tags__tag_source_columns',true)%} + {{ return("") }} + {% endif %} + + {% set query %} + {%- for source in graph.sources.values() -%} + {{dbt_tags.apply_column_tags_query(source)}} + {%- endfor %} + {%- endset %} + {{return(query)}} + +{% endmacro %} diff --git a/macros/apply-tags/apply_source_column_tags.yml b/macros/apply-tags/apply_source_column_tags.yml new file mode 100644 index 0000000..53d2763 --- /dev/null +++ b/macros/apply-tags/apply_source_column_tags.yml @@ -0,0 +1,36 @@ +macros: + - name: apply_source_column_tags + description: | + Used to apply tags to columns of source tables. + + Looking for all tags at all columns in all sources, set the (allowed) dbt tags as the adapter tags to the configured column. + Use the macro in the on-run-end hook to apply tags to source columns. + Tags can be assigned to columns using a separator ("~" by default) to denote a tag value. If no value is provided, the column name is used as the default value. + Tag values are able to be set by adding in a separator ("~" by default) and then the value afterwards which will be set as a string. If you don't set a value, then the name of the column is set as the value. + Example, without a value: + - column: customer_first_name + - dbt tag name: pii_name + - dbt tag value set on column: "pii_name" + - Tag Key: pii_name + - Tag Value: "customer_first_name" + + Example, with a value: + - column: membership_id + - dbt tag name: pii_mask_x_last_characters + - dbt tag value set on column: "pii_mask_x_last_characters~4" + - Tag Key: pii_mask_x_last_characters + - Tag Value: "4" + Note: To unapply the tags, let's simply re-create the table/view! + + "~" (tilda) is defined as the default separator. If you want to change it to another character if you use this in your tag names, set the var "dbt_tags__tag_name_separator" in "dbt_project.yml" to another character. + + **Usage** in `dbt_project.yml` file: + ``` + {% raw %} + on-run-end: + - > + {% if flags.WHICH in ('run', 'build') %} + {{ dbt_tags.apply_source_column_tags() }} + {% endif %} + {% endraw %} + ``` diff --git a/macros/utils/get_dbt_column_tags.sql b/macros/utils/get_dbt_column_tags.sql index 9752a53..f3bba7c 100644 --- a/macros/utils/get_dbt_column_tags.sql +++ b/macros/utils/get_dbt_column_tags.sql @@ -13,9 +13,9 @@ {% for column_tag in value.tags if dbt_tags.is_allowed_tags(column_tag.split(tag_name_separator)[0]) %} {% if with_value %} - {% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag} %} + {% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag, "model_fqn": relation.relation_name} %} {% else %} - {% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag.split(tag_name_separator)[0]} %} + {% set found_tag = {"level": relation.resource_type ~ "." ~ relation.name ~ ".column", "name": value.name, "tag": column_tag.split(tag_name_separator)[0], "model_fqn": relation.relation_name} %} {# {{ log(found_tag, info=True) if debug}} #} {% endif %} {% do found_tags.append(found_tag) %} diff --git a/macros/utils/get_dbt_relation_tags.sql b/macros/utils/get_dbt_relation_tags.sql index 3f61685..470f4f3 100644 --- a/macros/utils/get_dbt_relation_tags.sql +++ b/macros/utils/get_dbt_relation_tags.sql @@ -10,9 +10,9 @@ {% if relation.tags is defined %} {% for relation_tag in relation.tags if dbt_tags.is_allowed_tags(relation_tag.split(tag_name_separator)[0]) %} {% if with_value %} - {% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag} %} + {% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag, "model_fqn": relation.relation_name} %} {% else %} - {% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag.split(tag_name_separator)[0]} %} + {% set found_tag = {"level": relation.resource_type, "name": relation.name, "tag": relation_tag.split(tag_name_separator)[0], "model_fqn": relation.relation_name} %} {# {{ log(found_tag, info=True) if debug}} #} {% endif %} {% do found_tags.append(found_tag) %} diff --git a/pyproject.toml b/pyproject.toml index 67b225f..806e2d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,8 @@ dbt-tags-tagging = [ dbt-tags-test-tagging = [ {cmd = "dbt deps --project-dir integration_tests"}, {cmd = "poe dbt-tags-tagging"}, - {cmd = "dbt build --exclude tag:cleanup --project-dir integration_tests"}, + {cmd = "dbt build -s resource_type:seed --project-dir integration_tests"}, + {cmd = "dbt build --exclude tag:cleanup resource_type:seed --project-dir integration_tests"}, ] dbt-tags-test-unapply-mps = [ {cmd = "poe dbt-tags-tagging"}, @@ -102,8 +103,8 @@ dbt-tags-test-unapply-mps = [ dbt-tags-test-drop-tags = [ {cmd = "poe dbt-tags-tagging"}, {cmd = "dbt run-operation drop_tags --project-dir integration_tests"}, - {cmd = "dbt run -s verify_if_tags_created_correctly --project-dir integration_tests"}, - {cmd = "dbt test -s verify_if_drop_tags_correctly --project-dir integration_tests"}, + {cmd = "dbt run -s verify_if_tags_created_correctly --project-dir integration_tests --vars {'dbt_tags__tag_source_columns':false}"}, + {cmd = "dbt test -s verify_if_drop_tags_correctly --project-dir integration_tests --vars {'dbt_tags__tag_source_columns':false}"}, ] dbt-tags-test = [ {cmd = "poe dbt-tags-test-tagging"},