diff --git a/CHANGELOG.md b/CHANGELOG.md index 011a8f2ef..5b93a5b91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,13 @@ ## dbt-spark 0.21.0 (Release TBD) ### Fixes +- Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192)) - Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201)) - Add `retry_all` configuration setting to retry all connection issues, not just when the `_is_retryable_error` function determines ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) ### Contributors +- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207)) - [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192)) - [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201)) - [@gregingenii](https://github.com/gregingenii) ([#194](https://github.com/dbt-labs/dbt-spark/pull/194)) diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index f8e72449a..03fba9fac 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -212,11 +212,16 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: for cached_relation in cached_relations if str(cached_relation) == str(relation)), None) - if cached_relation is None or cached_relation.information is None: + columns = [] + if cached_relation and cached_relation.information: + columns = self.parse_columns_from_information(cached_relation) + if not columns: + # in open source delta 'show table extended' query output doesnt + # return relation's schema. if columns are empty from cache, + # use get_columns_in_relation spark macro + # which would execute 'describe extended tablename' query rows: List[agate.Row] = super().get_columns_in_relation(relation) columns = self.parse_describe_extended(relation, rows) - else: - columns = self.parse_columns_from_information(cached_relation) return columns def parse_columns_from_information(