Skip to content

Commit

Permalink
Fix persist_docs for columns (#180)
Browse files Browse the repository at this point in the history
* Fix persist_docs for columns

* Disable parquet model on endpoint

* Rm parquet model, not worth the fuss

* Update changelog [skip ci]
  • Loading branch information
jtcohen6 authored Jun 15, 2021
1 parent 2ab5523 commit a8a85c5
Show file tree
Hide file tree
Showing 13 changed files with 214 additions and 20 deletions.
13 changes: 9 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
## dbt next
## dbt-spark 0.20.0 (Release TBD)

### Fixes

- Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180))

## dbt-spark 0.20.0rc1 (June 8, 2021)

### Features

- Allow user to specify `use_ssl` ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169))
- Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171))
- Add support for column comment ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170))

- Add support for column-level `persist_docs` on Delta tables ([#84](https://github.com/fishtown-analytics/dbt-spark/pull/84), [#170](https://github.com/fishtown-analytics/dbt-spark/pull/170))

### Fixes
- Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159))
Expand All @@ -24,7 +29,7 @@
- [@cristianoperez](https://github.com/cristianoperez) ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170))


## dbt-spark 0.19.1 (Release TBD)
## dbt-spark 0.19.1 (April 2, 2021)

## dbt-spark 0.19.1b2 (February 26, 2021)

Expand Down
12 changes: 10 additions & 2 deletions dbt/include/spark/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,23 @@
{% do return(None) %}
{%- endmacro %}

{% macro spark__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_columns and config.persist_column_docs() and model.columns %}
{% do alter_column_comment(relation, model.columns) %}
{% endif %}
{% endmacro %}

{% macro spark__alter_column_comment(relation, column_dict) %}
{% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %}
{% for column_name in column_dict %}
{% set comment = column_dict[column_name]['description'] %}
{% set escaped_comment = comment | replace('\'', '\\\'') %}
{% set comment_query %}
alter table {{ relation }} change column {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} comment '{{ comment }}';
alter table {{ relation }} change column
{{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }}
comment '{{ escaped_comment }}';
{% endset %}
{% do run_query(comment_query) %}
{% endfor %}
{% endif %}
{% endmacro %}

12 changes: 4 additions & 8 deletions dbt/include/spark/macros/materializations/seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,7 @@
{%- set agate_table = load_agate_table() -%}
{%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}

{{ run_hooks(pre_hooks, inside_transaction=False) }}

-- `BEGIN` happens here:
{{ run_hooks(pre_hooks, inside_transaction=True) }}
{{ run_hooks(pre_hooks) }}

-- build model
{% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %}
Expand All @@ -98,10 +95,9 @@
{{ sql }}
{% endcall %}

{{ run_hooks(post_hooks, inside_transaction=True) }}
-- `COMMIT` happens here
{{ adapter.commit() }}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{% do persist_docs(target_relation, model) %}

{{ run_hooks(post_hooks) }}

{{ return({'relations': [target_relation]}) }}

Expand Down
2 changes: 2 additions & 0 deletions dbt/include/spark/macros/materializations/table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
{% call statement('main') -%}
{{ create_table_as(False, target_relation, sql) }}
{%- endcall %}

{% do persist_docs(target_relation, model) %}

{{ run_hooks(post_hooks) }}

Expand Down
12 changes: 6 additions & 6 deletions test/custom/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def apache_spark_profile(self):
},
'test': {
'outputs': {
'default2': {
'thrift': {
'type': 'spark',
'host': 'localhost',
'user': 'dbt',
Expand All @@ -87,7 +87,7 @@ def apache_spark_profile(self):
'schema': self.unique_schema()
},
},
'target': 'default2'
'target': 'thrift'
}
}

Expand All @@ -98,7 +98,7 @@ def databricks_cluster_profile(self):
},
'test': {
'outputs': {
'odbc': {
'cluster': {
'type': 'spark',
'method': 'odbc',
'host': os.getenv('DBT_DATABRICKS_HOST_NAME'),
Expand All @@ -109,7 +109,7 @@ def databricks_cluster_profile(self):
'schema': self.unique_schema()
},
},
'target': 'odbc'
'target': 'cluster'
}
}

Expand All @@ -120,7 +120,7 @@ def databricks_sql_endpoint_profile(self):
},
'test': {
'outputs': {
'default2': {
'endpoint': {
'type': 'spark',
'method': 'odbc',
'host': os.getenv('DBT_DATABRICKS_HOST_NAME'),
Expand All @@ -131,7 +131,7 @@ def databricks_sql_endpoint_profile(self):
'schema': self.unique_schema()
},
},
'target': 'default2'
'target': 'endpoint'
}
}

Expand Down
3 changes: 3 additions & 0 deletions test/custom/persist_docs/data/seed.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,name
1,Alice
2,Bob
26 changes: 26 additions & 0 deletions test/custom/persist_docs/data/seeds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: 2

seeds:
- name: seed
description: |
Seed model description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
columns:
- name: id
description: |
id Column description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
- name: name
description: |
Some stuff here and then a call to
{{ doc('my_fun_doc')}}
10 changes: 10 additions & 0 deletions test/custom/persist_docs/models/my_fun_docs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{% docs my_fun_doc %}
name Column description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting

{% enddocs %}
1 change: 1 addition & 0 deletions test/custom/persist_docs/models/no_docs_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
select 1 as id, 'Alice' as name
71 changes: 71 additions & 0 deletions test/custom/persist_docs/models/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
version: 2

models:

- name: table_parquet_model
description: |
Table model description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
columns:
- name: id
description: |
id Column description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
- name: name
description: |
Some stuff here and then a call to
{{ doc('my_fun_doc')}}
- name: table_delta_model
description: |
Table model description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
columns:
- name: id
description: |
id Column description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
- name: name
description: |
Some stuff here and then a call to
{{ doc('my_fun_doc')}}
- name: view_model
description: |
View model description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
columns:
- name: id
description: |
id Column description "with double quotes"
and with 'single quotes' as welll as other;
'''abc123'''
reserved -- characters
--
/* comment */
Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting
2 changes: 2 additions & 0 deletions test/custom/persist_docs/models/table_delta_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{{ config(materialized='table', file_format='delta') }}
select 1 as id, 'Joe' as name
2 changes: 2 additions & 0 deletions test/custom/persist_docs/models/view_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{{ config(materialized='view') }}
select 2 as id, 'Bob' as name
68 changes: 68 additions & 0 deletions test/custom/persist_docs/test_persist_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from cProfile import run
from test.custom.base import DBTSparkIntegrationTest, use_profile
import dbt.exceptions

import json


class TestPersistDocsDelta(DBTSparkIntegrationTest):
@property
def schema(self):
return "persist_docs_columns"

@property
def models(self):
return "models"

@property
def project_config(self):
return {
'config-version': 2,
'models': {
'test': {
'+persist_docs': {
"relation": True,
"columns": True,
},
}
},
'seeds': {
'test': {
'+persist_docs': {
"relation": True,
"columns": True,
},
'+file_format': 'delta',
'+quote_columns': True
}
},
}

def test_delta_comments(self):
self.run_dbt(['seed'])
self.run_dbt(['run'])

for table in ['table_delta_model', 'seed']:
results = self.run_sql(
'describe extended {schema}.{table}'.format(schema=self.unique_schema(), table=table),
fetch='all'
)

for result in results:
if result[0] == 'Comment':
whatis = 'Seed' if table == 'seed' else 'Table'
assert result[1].startswith(f'{whatis} model description')
if result[0] == 'id':
assert result[2].startswith('id Column description')
if result[0] == 'name':
assert result[2].startswith('Some stuff here and then a call to')

# runs on Spark v3.0
@use_profile("databricks_cluster")
def test_delta_comments_databricks_cluster(self):
self.test_delta_comments()

# runs on Spark v3.0
@use_profile("databricks_sql_endpoint")
def test_delta_comments_databricks_sql_endpoint(self):
self.test_delta_comments()

0 comments on commit a8a85c5

Please sign in to comment.