diff --git a/CHANGELOG.md b/CHANGELOG.md index 948a64a8a..339f7f5c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,16 @@ -## dbt next +## dbt-spark 0.20.0 (Release TBD) + +### Fixes + +- Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180)) + +## dbt-spark 0.20.0rc1 (June 8, 2021) ### Features - Allow user to specify `use_ssl` ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169)) - Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171)) -- Add support for column comment ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) - +- Add support for column-level `persist_docs` on Delta tables ([#84](https://github.com/fishtown-analytics/dbt-spark/pull/84), [#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) ### Fixes - Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) @@ -24,7 +29,7 @@ - [@cristianoperez](https://github.com/cristianoperez) ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170)) -## dbt-spark 0.19.1 (Release TBD) +## dbt-spark 0.19.1 (April 2, 2021) ## dbt-spark 0.19.1b2 (February 26, 2021) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index ac1ec92b3..fcdc46c6d 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -174,15 +174,23 @@ {% do return(None) %} {%- endmacro %} +{% macro spark__persist_docs(relation, model, for_relation, for_columns) -%} + {% if for_columns and config.persist_column_docs() and model.columns %} + {% do alter_column_comment(relation, model.columns) %} + {% endif %} +{% endmacro %} + {% macro spark__alter_column_comment(relation, column_dict) %} {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %} {% for column_name in column_dict %} {% set comment = column_dict[column_name]['description'] %} + {% set escaped_comment = comment | replace('\'', '\\\'') %} {% set comment_query %} - alter table {{ relation }} change column {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} comment '{{ comment }}'; + alter table {{ relation }} change column + {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} + comment '{{ escaped_comment }}'; {% endset %} {% do run_query(comment_query) %} {% endfor %} {% endif %} {% endmacro %} - diff --git a/dbt/include/spark/macros/materializations/seed.sql b/dbt/include/spark/macros/materializations/seed.sql index 795f49329..536e6447b 100644 --- a/dbt/include/spark/macros/materializations/seed.sql +++ b/dbt/include/spark/macros/materializations/seed.sql @@ -81,10 +81,7 @@ {%- set agate_table = load_agate_table() -%} {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%} - {{ run_hooks(pre_hooks, inside_transaction=False) }} - - -- `BEGIN` happens here: - {{ run_hooks(pre_hooks, inside_transaction=True) }} + {{ run_hooks(pre_hooks) }} -- build model {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %} @@ -98,10 +95,9 @@ {{ sql }} {% endcall %} - {{ run_hooks(post_hooks, inside_transaction=True) }} - -- `COMMIT` happens here - {{ adapter.commit() }} - {{ run_hooks(post_hooks, inside_transaction=False) }} + {% do persist_docs(target_relation, model) %} + + {{ run_hooks(post_hooks) }} {{ return({'relations': [target_relation]}) }} diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index adfdb7a3c..3ae2df973 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -21,6 +21,8 @@ {% call statement('main') -%} {{ create_table_as(False, target_relation, sql) }} {%- endcall %} + + {% do persist_docs(target_relation, model) %} {{ run_hooks(post_hooks) }} diff --git a/test/custom/base.py b/test/custom/base.py index d2dc6dd78..28fcad3cc 100644 --- a/test/custom/base.py +++ b/test/custom/base.py @@ -76,7 +76,7 @@ def apache_spark_profile(self): }, 'test': { 'outputs': { - 'default2': { + 'thrift': { 'type': 'spark', 'host': 'localhost', 'user': 'dbt', @@ -87,7 +87,7 @@ def apache_spark_profile(self): 'schema': self.unique_schema() }, }, - 'target': 'default2' + 'target': 'thrift' } } @@ -98,7 +98,7 @@ def databricks_cluster_profile(self): }, 'test': { 'outputs': { - 'odbc': { + 'cluster': { 'type': 'spark', 'method': 'odbc', 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), @@ -109,7 +109,7 @@ def databricks_cluster_profile(self): 'schema': self.unique_schema() }, }, - 'target': 'odbc' + 'target': 'cluster' } } @@ -120,7 +120,7 @@ def databricks_sql_endpoint_profile(self): }, 'test': { 'outputs': { - 'default2': { + 'endpoint': { 'type': 'spark', 'method': 'odbc', 'host': os.getenv('DBT_DATABRICKS_HOST_NAME'), @@ -131,7 +131,7 @@ def databricks_sql_endpoint_profile(self): 'schema': self.unique_schema() }, }, - 'target': 'default2' + 'target': 'endpoint' } } diff --git a/test/custom/persist_docs/data/seed.csv b/test/custom/persist_docs/data/seed.csv new file mode 100644 index 000000000..4a295177c --- /dev/null +++ b/test/custom/persist_docs/data/seed.csv @@ -0,0 +1,3 @@ +id,name +1,Alice +2,Bob \ No newline at end of file diff --git a/test/custom/persist_docs/data/seeds.yml b/test/custom/persist_docs/data/seeds.yml new file mode 100644 index 000000000..7ab82fa6b --- /dev/null +++ b/test/custom/persist_docs/data/seeds.yml @@ -0,0 +1,26 @@ +version: 2 + +seeds: + - name: seed + description: | + Seed model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} diff --git a/test/custom/persist_docs/models/my_fun_docs.md b/test/custom/persist_docs/models/my_fun_docs.md new file mode 100644 index 000000000..f3c0fbf55 --- /dev/null +++ b/test/custom/persist_docs/models/my_fun_docs.md @@ -0,0 +1,10 @@ +{% docs my_fun_doc %} +name Column description "with double quotes" +and with 'single quotes' as welll as other; +'''abc123''' +reserved -- characters +-- +/* comment */ +Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + +{% enddocs %} diff --git a/test/custom/persist_docs/models/no_docs_model.sql b/test/custom/persist_docs/models/no_docs_model.sql new file mode 100644 index 000000000..e39a7a156 --- /dev/null +++ b/test/custom/persist_docs/models/no_docs_model.sql @@ -0,0 +1 @@ +select 1 as id, 'Alice' as name diff --git a/test/custom/persist_docs/models/schema.yml b/test/custom/persist_docs/models/schema.yml new file mode 100644 index 000000000..78dcda799 --- /dev/null +++ b/test/custom/persist_docs/models/schema.yml @@ -0,0 +1,71 @@ +version: 2 + +models: + + - name: table_parquet_model + description: | + Table model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} + + - name: table_delta_model + description: | + Table model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + - name: name + description: | + Some stuff here and then a call to + {{ doc('my_fun_doc')}} + + - name: view_model + description: | + View model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting diff --git a/test/custom/persist_docs/models/table_delta_model.sql b/test/custom/persist_docs/models/table_delta_model.sql new file mode 100644 index 000000000..06e40e0c6 --- /dev/null +++ b/test/custom/persist_docs/models/table_delta_model.sql @@ -0,0 +1,2 @@ +{{ config(materialized='table', file_format='delta') }} +select 1 as id, 'Joe' as name diff --git a/test/custom/persist_docs/models/view_model.sql b/test/custom/persist_docs/models/view_model.sql new file mode 100644 index 000000000..a6f96a16d --- /dev/null +++ b/test/custom/persist_docs/models/view_model.sql @@ -0,0 +1,2 @@ +{{ config(materialized='view') }} +select 2 as id, 'Bob' as name diff --git a/test/custom/persist_docs/test_persist_docs.py b/test/custom/persist_docs/test_persist_docs.py new file mode 100644 index 000000000..d9acf70d1 --- /dev/null +++ b/test/custom/persist_docs/test_persist_docs.py @@ -0,0 +1,68 @@ +from cProfile import run +from test.custom.base import DBTSparkIntegrationTest, use_profile +import dbt.exceptions + +import json + + +class TestPersistDocsDelta(DBTSparkIntegrationTest): + @property + def schema(self): + return "persist_docs_columns" + + @property + def models(self): + return "models" + + @property + def project_config(self): + return { + 'config-version': 2, + 'models': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + } + }, + 'seeds': { + 'test': { + '+persist_docs': { + "relation": True, + "columns": True, + }, + '+file_format': 'delta', + '+quote_columns': True + } + }, + } + + def test_delta_comments(self): + self.run_dbt(['seed']) + self.run_dbt(['run']) + + for table in ['table_delta_model', 'seed']: + results = self.run_sql( + 'describe extended {schema}.{table}'.format(schema=self.unique_schema(), table=table), + fetch='all' + ) + + for result in results: + if result[0] == 'Comment': + whatis = 'Seed' if table == 'seed' else 'Table' + assert result[1].startswith(f'{whatis} model description') + if result[0] == 'id': + assert result[2].startswith('id Column description') + if result[0] == 'name': + assert result[2].startswith('Some stuff here and then a call to') + + # runs on Spark v3.0 + @use_profile("databricks_cluster") + def test_delta_comments_databricks_cluster(self): + self.test_delta_comments() + + # runs on Spark v3.0 + @use_profile("databricks_sql_endpoint") + def test_delta_comments_databricks_sql_endpoint(self): + self.test_delta_comments()