Skip to content

Commit

Permalink
Fix config change detection not working for multiple sortkey in mater…
Browse files Browse the repository at this point in the history
…ialized views (#841)

* fix config change detection not working for multiple sortkey
* add test
* changelog
* fix sort_key_position for non sort_key column

---------

Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com>
Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 9, 2024
1 parent 08e50f9 commit 8bbdbf2
Show file tree
Hide file tree
Showing 6 changed files with 256 additions and 22 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240610-104114.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix config change detection not working for multiple sortkey in materialized views
time: 2024-06-10T10:41:14.721411-03:00
custom:
Author: lvitti
Issue: "838"
18 changes: 13 additions & 5 deletions dbt/adapters/redshift/relation_configs/materialized_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,13 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
"query": agate.Table(
agate.Row({"definition": "<query>")}
),
"columns": agate.Table(
agate.Row({
"column": "<column_name>",
"sort_key_position": <int>,
"is_dist_key: any(true, false),
})
),
}
Additional columns in either value is fine, as long as `sortkey` and `sortstyle` are available.
Expand Down Expand Up @@ -199,11 +206,12 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict:
{"dist": RedshiftDistConfig.parse_relation_results(materialized_view)}
)

# TODO: this only shows the first column in the sort key
if materialized_view.get("sortkey1"):
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(materialized_view)}
)
if columns := relation_results.get("columns"):
sort_columns = [row for row in columns.rows if row.get("sort_key_position", 0) > 0]
if sort_columns:
config_dict.update(
{"sort": RedshiftSortConfig.parse_relation_results(sort_columns)}
)

return config_dict

Expand Down
37 changes: 22 additions & 15 deletions dbt/adapters/redshift/relation_configs/sort.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from dbt.adapters.contracts.relation import RelationConfig
from typing import Optional, FrozenSet, Set, Dict, Any, TYPE_CHECKING
from typing import Optional, Set, Dict, Any, TYPE_CHECKING, Tuple

from dbt.adapters.relation_configs import (
RelationConfigChange,
Expand Down Expand Up @@ -46,7 +46,7 @@ class RedshiftSortConfig(RedshiftRelationConfigBase, RelationConfigValidationMix
"""

sortstyle: Optional[RedshiftSortStyle] = None
sortkey: Optional[FrozenSet[str]] = None
sortkey: Optional[Tuple[str]] = None

def __post_init__(self):
# maintains `frozen=True` while allowing for a variable default on `sort_type`
Expand Down Expand Up @@ -103,7 +103,7 @@ def validation_rules(self) -> Set[RelationConfigValidationRule]:
def from_dict(cls, config_dict) -> Self:
kwargs_dict = {
"sortstyle": config_dict.get("sortstyle"),
"sortkey": frozenset(column for column in config_dict.get("sortkey", {})),
"sortkey": tuple(column for column in config_dict.get("sortkey", {})),
}
sort: Self = super().from_dict(kwargs_dict) # type: ignore
return sort # type: ignore
Expand Down Expand Up @@ -133,12 +133,12 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any
if isinstance(sortkey, str):
sortkey = [sortkey]

config_dict.update({"sortkey": set(sortkey)})
config_dict.update({"sortkey": tuple(sortkey)})

return config_dict

@classmethod
def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict:
def parse_relation_results(cls, relation_results_entry: "agate.MappedSequence") -> dict:
"""
Translate agate objects from the database into a standard dictionary.
Expand All @@ -147,19 +147,26 @@ def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict:
Processing of `sortstyle` has been omitted here, which means it's the default (compound).
Args:
relation_results_entry: the description of the sortkey and sortstyle from the database in this format:
agate.Row({
...,
"sortkey1": "<column_name>",
...
})
relation_results_entry: The list of rows that contains the sortkey in this format:
[
agate.Row({
...,
"column": "<column_name>",
"sort_key_position": <int>,
...
}),
]
Returns: a standard dictionary describing this `RedshiftSortConfig` instance
"""
if sortkey := relation_results_entry.get("sortkey1"):
return {"sortkey": {sortkey}}
return {}
sort_config = []

sorted_columns = sorted(relation_results_entry, key=lambda x: x["sort_key_position"])
for column in sorted_columns:
if column.get("sort_key_position"):
sort_config.append(column.get("column"))

return {"sortkey": sort_config}


@dataclass(frozen=True, eq=True, unsafe_hash=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@
{%- endset %}
{% set _materialized_view = run_query(_materialized_view_sql) %}

{%- set _column_descriptor_sql -%}
SELECT
a.attname as column,
a.attisdistkey as is_dist_key,
a.attsortkeyord as sort_key_position
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE
n.nspname ilike '{{ relation.schema }}'
AND c.relname LIKE 'mv_tbl__{{ relation.identifier }}__%'
{%- endset %}
{% set _column_descriptor = run_query(_column_descriptor_sql) %}

{%- set _query_sql -%}
select
vw.definition
Expand All @@ -34,6 +48,10 @@
{%- endset %}
{% set _query = run_query(_query_sql) %}

{% do return({'materialized_view': _materialized_view, 'query': _query}) %}
{% do return({
'materialized_view': _materialized_view,
'query': _query,
'columns': _column_descriptor,
})%}

{% endmacro %}
55 changes: 55 additions & 0 deletions tests/unit/test_materialized_view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import Mock

import agate
import pytest

from dbt.adapters.redshift.relation_configs import RedshiftMaterializedViewConfig
Expand Down Expand Up @@ -53,3 +54,57 @@ def test_redshift_materialized_view_config_throws_expected_exception_with_invali
)
with pytest.raises(ValueError):
config.parse_relation_config(model_node)


def test_redshift_materialized_view_parse_relation_results_handles_multiples_sort_key():
materialized_view = agate.Table.from_object(
[],
[
"database",
"schema",
"table",
"diststyle",
"sortkey1",
"autorefresh",
],
)

column_descriptor = agate.Table.from_object(
[
{
"column": "my_column",
"is_dist_key": True,
"sort_key_position": 1,
},
{
"column": "my_column2",
"is_dist_key": True,
"sort_key_position": 2,
},
{
"column": "my_column5",
"is_dist_key": False,
"sort_key_position": 0,
},
],
)

query = agate.Table.from_object(
[
{
"definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
}
]
)

relation_results = {
"materialized_view": materialized_view,
"columns": column_descriptor,
"query": query,
}

config_dict = RedshiftMaterializedViewConfig.parse_relation_results(relation_results)

assert isinstance(config_dict["sort"], dict)
assert config_dict["sort"]["sortkey"][0] == "my_column"
assert config_dict["sort"]["sortkey"][1] == "my_column2"
142 changes: 141 additions & 1 deletion tests/unit/test_relation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from unittest.mock import Mock

import agate
import pytest

from dbt.adapters.redshift.relation import RedshiftRelation
from dbt.adapters.contracts.relation import RelationType
from dbt.adapters.contracts.relation import (
RelationType,
RelationConfig,
)

from dbt.adapters.redshift.relation_configs.sort import RedshiftSortStyle


def test_renameable_relation():
Expand All @@ -15,3 +25,133 @@ def test_renameable_relation():
RelationType.Table,
}
)


@pytest.fixture
def materialized_view_without_sort_key_from_db():
materialized_view = agate.Table.from_object(
[
{
"database": "my_db",
"schema": "my_schema",
"table": "my_table",
}
],
)

column_descriptor = agate.Table.from_object([])

query = agate.Table.from_object(
[
{
"definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
}
]
)

relation_results = {
"materialized_view": materialized_view,
"columns": column_descriptor,
"query": query,
}
return relation_results


@pytest.fixture
def materialized_view_without_sort_key_config():
relation_config = Mock(spec=RelationConfig)

relation_config.database = "my_db"
relation_config.identifier = "my_table"
relation_config.schema = "my_schema"
relation_config.config = Mock()
relation_config.config.extra = {}
relation_config.config.sort = ""
relation_config.compiled_code = (
"create materialized view my_view as (select 1 as my_column, 'value' as my_column2)"
)
return relation_config


@pytest.fixture
def materialized_view_multiple_sort_key_from_db(materialized_view_without_sort_key_from_db):
materialized_view_without_sort_key_from_db["columns"] = agate.Table.from_object(
[
{
"column": "my_column",
"is_dist_key": True,
"sort_key_position": 1,
},
{
"column": "my_column2",
"is_dist_key": True,
"sort_key_position": 2,
},
],
)
return materialized_view_without_sort_key_from_db


@pytest.fixture
def materialized_view_multiple_sort_key_config(materialized_view_without_sort_key_config):
materialized_view_without_sort_key_config.config.extra = {
"sort_type": RedshiftSortStyle.compound,
"sort": ["my_column", "my_column2"],
}

return materialized_view_without_sort_key_config


def test_materialized_view_config_changeset_without_sort_key_empty_changes(
materialized_view_without_sort_key_from_db,
materialized_view_without_sort_key_config,
):
change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_without_sort_key_from_db,
materialized_view_without_sort_key_config,
)

assert change_set is None


def test_materialized_view_config_changeset_multiple_sort_key_without_changes(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is None


def test_materialized_view_config_changeset_multiple_sort_key_with_changes(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):
materialized_view_multiple_sort_key_config.config.extra["sort"].append("my_column3")

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is not None
assert change_set.sort.context.sortkey == ("my_column", "my_column2", "my_column3")


def test_materialized_view_config_changeset_multiple_sort_key_with_changes_in_order_column(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
):
materialized_view_multiple_sort_key_config.config.extra["sort"] = ["my_column2", "my_column"]

change_set = RedshiftRelation.materialized_view_config_changeset(
materialized_view_multiple_sort_key_from_db,
materialized_view_multiple_sort_key_config,
)

assert change_set is not None
assert change_set.sort.context.sortkey == ("my_column2", "my_column")

0 comments on commit 8bbdbf2

Please sign in to comment.