From 7882d9c3efc233bff15af4c79090d1ae3982ef79 Mon Sep 17 00:00:00 2001 From: Hariharan Banukumar Date: Sun, 22 Aug 2021 18:21:18 -0400 Subject: [PATCH 1/2] fixed get_columns_in_relation for open source delta table --- dbt/adapters/spark/impl.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index f8e72449a..31737bbde 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -212,11 +212,15 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: for cached_relation in cached_relations if str(cached_relation) == str(relation)), None) - if cached_relation is None or cached_relation.information is None: + columns = [] + if cached_relation and cached_relation.information: + columns = self.parse_columns_from_information(cached_relation) + if not columns: + # in open source delta 'show table extended' query output doesnt return relation's schema + # if columns are empty from cache, use get_columns_in_relation spark macro + # which would execute 'describe extended tablename' query rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows) - else: - columns = self.parse_columns_from_information(cached_relation) return columns def parse_columns_from_information( From 6377ffb66264ca555999deb07803410f42350b66 Mon Sep 17 00:00:00 2001 From: Hariharan Banukumar Date: Sun, 22 Aug 2021 18:42:00 -0400 Subject: [PATCH 2/2] fixed E501 linting error and added change log --- CHANGELOG.md | 2 ++ dbt/adapters/spark/impl.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 011a8f2ef..5b93a5b91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,13 @@ ## dbt-spark 0.21.0 (Release TBD) ### Fixes +- Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) - Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201)) - Add `retry_all` configuration setting to retry all connection issues, not just when the `_is_retryable_error` function determines ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) ### Contributors +- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192)) - [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201)) - [@gregingenii](https://github.com/gregingenii) ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index 31737bbde..03fba9fac 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -216,8 +216,9 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: if cached_relation and cached_relation.information: columns = self.parse_columns_from_information(cached_relation) if not columns: - # in open source delta 'show table extended' query output doesnt return relation's schema - # if columns are empty from cache, use get_columns_in_relation spark macro + # in open source delta 'show table extended' query output doesnt + # return relation's schema. if columns are empty from cache, + # use get_columns_in_relation spark macro # which would execute 'describe extended tablename' query rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows)