Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix unit tests related to new serialization api #150

Merged
merged 2 commits into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
## dbt-spark 0.19.0 (Release TBD)
## dbt-spark 0.19.1b2 (Release TBD)

### Under the hood
- update serialization calls to use new API in dbt-core `0.19.1b2` ([#150](https://github.com/fishtown-analytics/dbt-spark/pull/150))


## dbt-spark 0.19.0 (February 22, 2021)

### Breaking changes
- Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141))
Expand Down
11 changes: 6 additions & 5 deletions dbt/adapters/spark/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
from typing import TypeVar, Optional, Dict, Any

from dbt.adapters.base.column import Column
from dbt.dataclass_schema import dbtClassMixin
from hologram import JsonDict

Self = TypeVar('Self', bound='SparkColumn')


@dataclass
class SparkColumn(Column):
class SparkColumn(dbtClassMixin, Column):
table_database: Optional[str] = None
table_schema: Optional[str] = None
table_name: Optional[str] = None
Expand Down Expand Up @@ -55,12 +56,12 @@ def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]:
table_stats[f'stats:{key}:include'] = True
return table_stats

def to_dict(
self, omit_none: bool = True, validate: bool = False
def to_column_dict(
self, keep_none: bool = False, validate: bool = False
) -> JsonDict:
original_dict = super().to_dict(omit_none=omit_none)
original_dict = self.to_dict(options={'keep_none': keep_none})
# If there are stats, merge them into the root of the dict
original_stats = original_dict.pop('table_stats')
original_stats = original_dict.pop('table_stats', None)
if original_stats:
original_dict.update(original_stats)
return original_dict
8 changes: 7 additions & 1 deletion dbt/adapters/spark/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class SparkConnectionMethod(StrEnum):
class SparkCredentials(Credentials):
host: str
method: SparkConnectionMethod
schema: str
database: Optional[str]
driver: Optional[str] = None
cluster: Optional[str] = None
Expand All @@ -61,6 +60,13 @@ class SparkCredentials(Credentials):
connect_retries: int = 0
connect_timeout: int = 10

@classmethod
def __pre_deserialize__(cls, data, options=None):
data = super().__pre_deserialize__(data, options=options)
if 'database' not in data:
data['database'] = None
return data

def __post_init__(self):
# spark classifies database and schema as the same thing
if (
Expand Down
2 changes: 1 addition & 1 deletion dbt/adapters/spark/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def _get_columns_for_catalog(
if owner:
column.table_owner = owner
# convert SparkColumns into catalog dicts
as_dict = column.to_dict()
as_dict = column.to_column_dict()
as_dict['column_name'] = as_dict.pop('column', None)
as_dict['column_type'] = as_dict.pop('dtype')
as_dict['table_database'] = None
Expand Down
8 changes: 4 additions & 4 deletions test/unit/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_parse_relation(self):
rows = SparkAdapter(config).parse_describe_extended(
relation, input_cols)
self.assertEqual(len(rows), 3)
self.assertEqual(rows[0].to_dict(omit_none=False), {
self.assertEqual(rows[0].to_column_dict(keep_none=True), {
'table_database': None,
'table_schema': relation.schema,
'table_name': relation.name,
Expand All @@ -279,7 +279,7 @@ def test_parse_relation(self):
'char_size': None
})

self.assertEqual(rows[1].to_dict(omit_none=False), {
self.assertEqual(rows[1].to_column_dict(keep_none=True), {
'table_database': None,
'table_schema': relation.schema,
'table_name': relation.name,
Expand All @@ -293,7 +293,7 @@ def test_parse_relation(self):
'char_size': None
})

self.assertEqual(rows[2].to_dict(omit_none=False), {
self.assertEqual(rows[2].to_column_dict(keep_none=True), {
'table_database': None,
'table_schema': relation.schema,
'table_name': relation.name,
Expand Down Expand Up @@ -345,7 +345,7 @@ def test_parse_relation_with_statistics(self):
rows = SparkAdapter(config).parse_describe_extended(
relation, input_cols)
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0].to_dict(omit_none=False), {
self.assertEqual(rows[0].to_column_dict(keep_none=True), {
'table_database': None,
'table_schema': relation.schema,
'table_name': relation.name,
Expand Down