Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality to persist descriptions to nested columns in bigquery #2550

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
- `dbt deps` now respects the `--project-dir` flag, so using `dbt deps --project-dir=/some/path` and then `dbt run --project-dir=/some/path` will properly find dependencies ([#2519](https://github.com/fishtown-analytics/dbt/issues/2519), [#2534](https://github.com/fishtown-analytics/dbt/pull/2534))
- `packages.yml` revision/version fields can be float-like again (`revision: '1.0'` is valid). ([#2518](https://github.com/fishtown-analytics/dbt/issues/2518), [#2535](https://github.com/fishtown-analytics/dbt/pull/2535))
- Parallel RPC requests no longer step on each others' arguments ([[#2484](https://github.com/fishtown-analytics/dbt/issues/2484), [#2554](https://github.com/fishtown-analytics/dbt/pull/2554)])
- `persist_docs` now takes into account descriptions for nested columns in bigquery ([#2549](https://github.com/fishtown-analytics/dbt/issues/2549), [#2550](https://github.com/fishtown-analytics/dbt/pull/2550))
- On windows (depending upon OS support), dbt no longer fails with errors when writing artifacts ([#2558](https://github.com/fishtown-analytics/dbt/issues/2558), [#2566](https://github.com/fishtown-analytics/dbt/pull/2566))

Contributors:
- [@bodschut](https://github.com/bodschut) ([#2550](https://github.com/fishtown-analytics/dbt/pull/2550))

## dbt 0.17.0 (June 08, 2020)

### Fixes
Expand Down
44 changes: 37 additions & 7 deletions plugins/bigquery/dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,36 @@ def get_table_ref_from_relation(self, conn, relation):
relation.identifier,
conn)

def _update_column_dict(self, bq_column_dict, dbt_columns, parent=''):
"""
Helper function to recursively traverse the schema of a table in the
update_column_descriptions function below.

bq_column_dict should be a dict as obtained by the to_api_repr()
function of a SchemaField object.
"""
if parent:
dotted_column_name = '{}.{}'.format(parent, bq_column_dict['name'])
else:
dotted_column_name = bq_column_dict['name']

if dotted_column_name in dbt_columns:
column_config = dbt_columns[dotted_column_name]
bq_column_dict['description'] = column_config.get('description')

new_fields = []
for child_col_dict in bq_column_dict.get('fields', list()):
new_child_column_dict = self._update_column_dict(
child_col_dict,
dbt_columns,
parent=dotted_column_name
)
new_fields.append(new_child_column_dict)

bq_column_dict['fields'] = new_fields

return bq_column_dict

@available.parse_none
def update_column_descriptions(self, relation, columns):
if len(columns) == 0:
Expand All @@ -574,13 +604,13 @@ def update_column_descriptions(self, relation, columns):
table = conn.handle.get_table(table_ref)

new_schema = []
for column in table.schema:
if column.name in columns:
column_config = columns[column.name]
column_dict = column.to_api_repr()
column_dict['description'] = column_config.get('description')
column = SchemaField.from_api_repr(column_dict)
new_schema.append(column)
for bq_column in table.schema:
bq_column_dict = bq_column.to_api_repr()
new_bq_column_dict = self._update_column_dict(
bq_column_dict,
columns
)
new_schema.append(SchemaField.from_api_repr(new_bq_column_dict))

new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema)
conn.handle.update_table(new_table, ['schema'])
Expand Down
5 changes: 2 additions & 3 deletions plugins/bigquery/dbt/include/bigquery/macros/catalog.sql
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
-- use the "real" column name from the paths query below
column_name as base_column_name,
ordinal_position as column_index,
cast(null as string) as column_comment,

is_partitioning_column,
clustering_ordinal_position
Expand All @@ -99,10 +98,10 @@
concat(table_catalog, '.', table_schema, '.', table_name) as relation_id,
field_path as column_name,
data_type as column_type,
column_name as base_column_name
column_name as base_column_name,
description as column_comment

from {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }}
where data_type not like 'STRUCT%'

),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ select
3 as field_3,

struct(
4 as field_4,
5 as field_5
5 as field_5,
6 as field_6
) as nested_field
12 changes: 9 additions & 3 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,17 +619,23 @@ def expected_bigquery_complex_catalog(self):
'type': 'INT64',
'comment': None
},
'nested_field.field_4': {
'name': 'nested_field.field_4',
'nested_field': {
'name': 'nested_field',
'index': 4,
'type': 'INT64',
'type': 'STRUCT<field_5 INT64, field_6 INT64>',
'comment': None
},
'nested_field.field_5': {
'name': 'nested_field.field_5',
'index': 5,
'type': 'INT64',
'comment': None
},
'nested_field.field_6': {
'name': 'nested_field.field_6',
'index': 6,
'type': 'INT64',
'comment': None
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: 2

models:
- name: table_model_nested
columns:
- name: level_1
description: level_1 column description
- name: level_1.level_2
description: level_2 column description
- name: level_1.level_2.level_3_a
description: level_3 column description
- name: view_model_nested
columns:
- name: level_1
description: level_1 column description
- name: level_1.level_2
description: level_2 column description
- name: level_1.level_2.level_3_a
description: level_3 column description
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{ config(materialized='table') }}
SELECT
STRUCT(
STRUCT(
1 AS level_3_a,
2 AS level_3_b
) AS level_2
) AS level_1
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{ config(materialized='view') }}
SELECT
STRUCT(
STRUCT(
1 AS level_3_a,
2 AS level_3_b
) AS level_2
) AS level_1
79 changes: 77 additions & 2 deletions test/integration/060_persist_docs_tests/test_persist_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ def _assert_has_table_comments(self, table_node):
assert table_id_comment.startswith('id Column description')

table_name_comment = table_node['columns']['name']['comment']
assert table_name_comment.startswith('Some stuff here and then a call to')
assert table_name_comment.startswith(
'Some stuff here and then a call to')

self._assert_common_comments(
table_comment, table_id_comment, table_name_comment
)

def _assert_has_view_comments(self, view_node, has_node_comments=True, has_column_comments=True):
def _assert_has_view_comments(self, view_node, has_node_comments=True,
has_column_comments=True):
view_comment = view_node['metadata']['comment']
if has_node_comments:
assert view_comment.startswith('View model description')
Expand Down Expand Up @@ -156,3 +158,76 @@ def test_snowflake_persist_docs(self):
@use_profile('bigquery')
def test_bigquery_persist_docs(self):
self.run_dbt()


class TestPersistDocsNested(BasePersistDocsTest):
@property
def project_config(self):
return {
'config-version': 2,
'models': {
'test': {
'+persist_docs': {
"relation": True,
"columns": True,
},
}
}
}

@property
def models(self):
return 'models-bigquery-nested'

@use_profile('bigquery')
def test_bigquery_persist_docs(self):
"""
run dbt and use the bigquery client from the adapter to check if the
colunmn descriptions are persisted on the test model table and view.

Next, generate the catalog and check if the comments are also included.
"""
self.run_dbt()

self.run_dbt(['docs', 'generate'])
with open('target/catalog.json') as fp:
catalog_data = json.load(fp)
assert 'nodes' in catalog_data
assert len(catalog_data['nodes']) == 2 # table and view model

for node_id in ['table_model_nested', 'view_model_nested']:
# check the descriptions using the api
with self.adapter.connection_named('_test'):
client = self.adapter.connections \
.get_thread_connection().handle

table_id = "{}.{}.{}".format(
self.default_database,
self.unique_schema(),
node_id
)
bq_schema = client.get_table(table_id).schema

level_1_field = bq_schema[0]
assert level_1_field.description == \
"level_1 column description"

level_2_field = level_1_field.fields[0]
assert level_2_field.description == \
"level_2 column description"

level_3_field = level_2_field.fields[0]
assert level_3_field.description == \
"level_3 column description"

# check the descriptions in the catalog
node = catalog_data['nodes']['model.test.{}'.format(node_id)]

level_1_column = node['columns']['level_1']
assert level_1_column['comment'] == "level_1 column description"

level_2_column = node['columns']['level_1.level_2']
assert level_2_column['comment'] == "level_2 column description"

level_3_column = node['columns']['level_1.level_2.level_3_a']
assert level_3_column['comment'] == "level_3 column description"