dbt-labs · beckjake · Jun 19, 2020 · Jun 19, 2020 · Jun 19, 2020 · Jun 19, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,8 +5,12 @@
 - `dbt deps` now respects the `--project-dir` flag, so using `dbt deps --project-dir=/some/path` and then `dbt run --project-dir=/some/path` will properly find dependencies ([#2519](https://github.com/fishtown-analytics/dbt/issues/2519), [#2534](https://github.com/fishtown-analytics/dbt/pull/2534))
 - `packages.yml` revision/version fields can be float-like again (`revision: '1.0'` is valid). ([#2518](https://github.com/fishtown-analytics/dbt/issues/2518), [#2535](https://github.com/fishtown-analytics/dbt/pull/2535))
 - Parallel RPC requests no longer step on each others' arguments ([[#2484](https://github.com/fishtown-analytics/dbt/issues/2484), [#2554](https://github.com/fishtown-analytics/dbt/pull/2554)])
+- `persist_docs` now takes into account descriptions for nested columns in bigquery ([#2549](https://github.com/fishtown-analytics/dbt/issues/2549), [#2550](https://github.com/fishtown-analytics/dbt/pull/2550))
 - On windows (depending upon OS support), dbt no longer fails with errors when writing artifacts ([#2558](https://github.com/fishtown-analytics/dbt/issues/2558), [#2566](https://github.com/fishtown-analytics/dbt/pull/2566))
 
+Contributors:
+ - [@bodschut](https://github.com/bodschut) ([#2550](https://github.com/fishtown-analytics/dbt/pull/2550))
+
 ## dbt 0.17.0 (June 08, 2020)
 
 ### Fixes

diff --git a/plugins/bigquery/dbt/adapters/bigquery/impl.py b/plugins/bigquery/dbt/adapters/bigquery/impl.py
@@ -564,6 +564,36 @@ def get_table_ref_from_relation(self, conn, relation):
                                           relation.identifier,
                                           conn)
 
+    def _update_column_dict(self, bq_column_dict, dbt_columns, parent=''):
+        """
+        Helper function to recursively traverse the schema of a table in the
+        update_column_descriptions function below.
+
+        bq_column_dict should be a dict as obtained by the to_api_repr()
+        function of a SchemaField object.
+        """
+        if parent:
+            dotted_column_name = '{}.{}'.format(parent, bq_column_dict['name'])
+        else:
+            dotted_column_name = bq_column_dict['name']
+
+        if dotted_column_name in dbt_columns:
+            column_config = dbt_columns[dotted_column_name]
+            bq_column_dict['description'] = column_config.get('description')
+
+        new_fields = []
+        for child_col_dict in bq_column_dict.get('fields', list()):
+            new_child_column_dict = self._update_column_dict(
+                child_col_dict,
+                dbt_columns,
+                parent=dotted_column_name
+            )
+            new_fields.append(new_child_column_dict)
+
+        bq_column_dict['fields'] = new_fields
+
+        return bq_column_dict
+
     @available.parse_none
     def update_column_descriptions(self, relation, columns):
         if len(columns) == 0:
@@ -574,13 +604,13 @@ def update_column_descriptions(self, relation, columns):
         table = conn.handle.get_table(table_ref)
 
         new_schema = []
-        for column in table.schema:
-            if column.name in columns:
-                column_config = columns[column.name]
-                column_dict = column.to_api_repr()
-                column_dict['description'] = column_config.get('description')
-                column = SchemaField.from_api_repr(column_dict)
-            new_schema.append(column)
+        for bq_column in table.schema:
+            bq_column_dict = bq_column.to_api_repr()
+            new_bq_column_dict = self._update_column_dict(
+                bq_column_dict,
+                columns
+            )
+            new_schema.append(SchemaField.from_api_repr(new_bq_column_dict))
 
         new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema)
         conn.handle.update_table(new_table, ['schema'])

diff --git a/plugins/bigquery/dbt/include/bigquery/macros/catalog.sql b/plugins/bigquery/dbt/include/bigquery/macros/catalog.sql
@@ -83,7 +83,6 @@
             -- use the "real" column name from the paths query below
             column_name as base_column_name,
             ordinal_position as column_index,
-            cast(null as string) as column_comment,
 
             is_partitioning_column,
             clustering_ordinal_position
@@ -99,10 +98,10 @@
             concat(table_catalog, '.', table_schema, '.', table_name) as relation_id,
             field_path as column_name,
             data_type as column_type,
-            column_name as base_column_name
+            column_name as base_column_name,
+            description as column_comment
 
         from {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }}
-        where data_type not like 'STRUCT%'
 
     ),
 

diff --git a/test/integration/029_docs_generate_tests/bq_models/nested_table.sql b/test/integration/029_docs_generate_tests/bq_models/nested_table.sql
@@ -10,6 +10,6 @@ select
     3 as field_3,
 
     struct(
-        4 as field_4,
-        5 as field_5
+        5 as field_5,
+        6 as field_6
     ) as nested_field
diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py
@@ -619,17 +619,23 @@ def expected_bigquery_complex_catalog(self):
                 'type': 'INT64',
                 'comment': None
             },
-            'nested_field.field_4': {
-                'name': 'nested_field.field_4',
+            'nested_field': {
+                'name': 'nested_field',
                 'index': 4,
-                'type': 'INT64',
+                'type': 'STRUCT<field_5 INT64, field_6 INT64>',
                 'comment': None
             },
             'nested_field.field_5': {
                 'name': 'nested_field.field_5',
                 'index': 5,
                 'type': 'INT64',
                 'comment': None
+            },
+            'nested_field.field_6': {
+                'name': 'nested_field.field_6',
+                'index': 6,
+                'type': 'INT64',
+                'comment': None
             }
         }
 

diff --git a/test/integration/060_persist_docs_tests/models-bigquery-nested/schema.yml b/test/integration/060_persist_docs_tests/models-bigquery-nested/schema.yml
@@ -0,0 +1,19 @@
+version: 2
+
+models:
+  - name: table_model_nested
+    columns:
+      - name: level_1
+        description: level_1 column description
+      - name: level_1.level_2
+        description: level_2 column description
+      - name: level_1.level_2.level_3_a
+        description: level_3 column description
+  - name: view_model_nested
+    columns:
+      - name: level_1
+        description: level_1 column description
+      - name: level_1.level_2
+        description: level_2 column description
+      - name: level_1.level_2.level_3_a
+        description: level_3 column description
diff --git a/test/integration/060_persist_docs_tests/models-bigquery-nested/table_model_nested.sql b/test/integration/060_persist_docs_tests/models-bigquery-nested/table_model_nested.sql
@@ -0,0 +1,8 @@
+{{ config(materialized='table') }}
+SELECT
+    STRUCT(
+        STRUCT(
+            1 AS level_3_a,
+            2 AS level_3_b
+        ) AS level_2
+    ) AS level_1
diff --git a/test/integration/060_persist_docs_tests/models-bigquery-nested/view_model_nested.sql b/test/integration/060_persist_docs_tests/models-bigquery-nested/view_model_nested.sql
@@ -0,0 +1,8 @@
+{{ config(materialized='view') }}
+SELECT
+    STRUCT(
+        STRUCT(
+            1 AS level_3_a,
+            2 AS level_3_b
+        ) AS level_2
+    ) AS level_1
diff --git a/test/integration/060_persist_docs_tests/test_persist_docs.py b/test/integration/060_persist_docs_tests/test_persist_docs.py
@@ -33,13 +33,15 @@ def _assert_has_table_comments(self, table_node):
         assert table_id_comment.startswith('id Column description')
 
         table_name_comment = table_node['columns']['name']['comment']
-        assert table_name_comment.startswith('Some stuff here and then a call to')
+        assert table_name_comment.startswith(
+            'Some stuff here and then a call to')
 
         self._assert_common_comments(
             table_comment, table_id_comment, table_name_comment
         )
 
-    def _assert_has_view_comments(self, view_node, has_node_comments=True, has_column_comments=True):
+    def _assert_has_view_comments(self, view_node, has_node_comments=True,
+                                  has_column_comments=True):
         view_comment = view_node['metadata']['comment']
         if has_node_comments:
             assert view_comment.startswith('View model description')
@@ -156,3 +158,76 @@ def test_snowflake_persist_docs(self):
     @use_profile('bigquery')
     def test_bigquery_persist_docs(self):
         self.run_dbt()
+
+
+class TestPersistDocsNested(BasePersistDocsTest):
+    @property
+    def project_config(self):
+        return {
+            'config-version': 2,
+            'models': {
+                'test': {
+                    '+persist_docs': {
+                        "relation": True,
+                        "columns": True,
+                    },
+                }
+            }
+        }
+
+    @property
+    def models(self):
+        return 'models-bigquery-nested'
+
+    @use_profile('bigquery')
+    def test_bigquery_persist_docs(self):
+        """
+        run dbt and use the bigquery client from the adapter to check if the
+        colunmn descriptions are persisted on the test model table and view.
+
+        Next, generate the catalog and check if the comments are also included.
+        """
+        self.run_dbt()
+
+        self.run_dbt(['docs', 'generate'])
+        with open('target/catalog.json') as fp:
+            catalog_data = json.load(fp)
+        assert 'nodes' in catalog_data
+        assert len(catalog_data['nodes']) == 2  # table and view model
+
+        for node_id in ['table_model_nested', 'view_model_nested']:
+            # check the descriptions using the api
+            with self.adapter.connection_named('_test'):
+                client = self.adapter.connections \
+                    .get_thread_connection().handle
+
+                table_id = "{}.{}.{}".format(
+                    self.default_database,
+                    self.unique_schema(),
+                    node_id
+                )
+                bq_schema = client.get_table(table_id).schema
+
+                level_1_field = bq_schema[0]
+                assert level_1_field.description == \
+                       "level_1 column description"
+
+                level_2_field = level_1_field.fields[0]
+                assert level_2_field.description == \
+                       "level_2 column description"
+
+                level_3_field = level_2_field.fields[0]
+                assert level_3_field.description == \
+                       "level_3 column description"
+
+            # check the descriptions in the catalog
+            node = catalog_data['nodes']['model.test.{}'.format(node_id)]
+
+            level_1_column = node['columns']['level_1']
+            assert level_1_column['comment'] == "level_1 column description"
+
+            level_2_column = node['columns']['level_1.level_2']
+            assert level_2_column['comment'] == "level_2 column description"
+
+            level_3_column = node['columns']['level_1.level_2.level_3_a']
+            assert level_3_column['comment'] == "level_3 column description"