From 572cdfaac5768f2ef58413ad73cc857943122bef Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:02:26 +1000 Subject: [PATCH 1/5] Cast table_owner to str Having an integer table_owner causes problems with dbt docs generate --- dbt/adapters/spark/impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index d28ad71a7..a12e7a940 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -180,7 +180,7 @@ def parse_describe_extended( table_schema=relation.schema, table_name=relation.name, table_type=relation.type, - table_owner=metadata.get(KEY_TABLE_OWNER), + table_owner=str(metadata.get(KEY_TABLE_OWNER)), table_stats=table_stats, column=column['col_name'], column_index=idx, From bc801b9e0315144a8d7647c090d380cba5d7a016 Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:15:41 +1000 Subject: [PATCH 2/5] Add fix to Changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d51b5125c..bae4e0c52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## dbt next + +### Fixes + +- Cast `table_owner` to string to avoid errors generating docs ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159)) + ## dbt-spark 0.19.1 (Release TBD) ## dbt-spark 0.19.1b2 (February 26, 2021) From bcfbcbc7c01d201bb51c71ea0575d1adced4c7b1 Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:42:05 +1000 Subject: [PATCH 3/5] Test conversion to string for owner --- test/unit/test_adapter.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 0092e131f..2a720af75 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -307,6 +307,35 @@ def test_parse_relation(self): 'char_size': None }) + def test_parse_relation_with_integer_owner(self): + self.maxDiff = None + rel_type = SparkRelation.get_relation_type.Table + + relation = SparkRelation.create( + schema='default_schema', + identifier='mytable', + type=rel_type + ) + assert relation.database is None + + # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED + plain_rows = [ + ('col1', 'decimal(22,0)'), + ('# Detailed Table Information', None), + ('Owner', 1234), + ] + + input_cols = [Row(keys=['col_name', 'data_type'], values=r) + for r in plain_rows] + + config = self._get_target_http(self.project_cfg) + rows = SparkAdapter(config).parse_describe_extended( + relation, input_cols) + + self.assertEqual(rows[0].to_column_dict()['table_owner'], '1234') + self.assertEqual(rows[1].to_column_dict()['table_owner'], '1234') + self.assertEqual(rows[2].to_column_dict()['table_owner'], '1234') + def test_parse_relation_with_statistics(self): self.maxDiff = None rel_type = SparkRelation.get_relation_type.Table From 00874ce4b777dae18b9c4dff3a8d2a9199e9e45c Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:42:38 +1000 Subject: [PATCH 4/5] Remove trailing comma --- test/unit/test_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 2a720af75..54831671b 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -322,7 +322,7 @@ def test_parse_relation_with_integer_owner(self): plain_rows = [ ('col1', 'decimal(22,0)'), ('# Detailed Table Information', None), - ('Owner', 1234), + ('Owner', 1234) ] input_cols = [Row(keys=['col_name', 'data_type'], values=r) From 818d43c6d6bdbcbc428dc1bffb9efbaeebda2da0 Mon Sep 17 00:00:00 2001 From: Scott Arbeitman Date: Mon, 12 Apr 2021 21:46:35 +1000 Subject: [PATCH 5/5] Use git and limit rows checked --- test/unit/test_adapter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/unit/test_adapter.py b/test/unit/test_adapter.py index 54831671b..5e50e3100 100644 --- a/test/unit/test_adapter.py +++ b/test/unit/test_adapter.py @@ -332,9 +332,7 @@ def test_parse_relation_with_integer_owner(self): rows = SparkAdapter(config).parse_describe_extended( relation, input_cols) - self.assertEqual(rows[0].to_column_dict()['table_owner'], '1234') - self.assertEqual(rows[1].to_column_dict()['table_owner'], '1234') - self.assertEqual(rows[2].to_column_dict()['table_owner'], '1234') + self.assertEqual(rows[0].to_column_dict().get('table_owner'), '1234') def test_parse_relation_with_statistics(self): self.maxDiff = None