Skip to content

Commit

Permalink
Detect breaking changes to constraints in state:modifed (#7476)
Browse files Browse the repository at this point in the history
* added test that fails

* added new exception

* add partial error checking - needs more specifics

* move contract check under modelnode

* try adding only enforced constraints

* add checks for enforced constraints

* changelog

* add materialization logic

* clean up tests, tweak materializations

* PR feedback

* more PR feedback

* change to tuple
  • Loading branch information
emmyoop authored May 10, 2023
1 parent 29f2cfc commit 05595f5
Show file tree
Hide file tree
Showing 11 changed files with 450 additions and 67 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230503-101100.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Detect breaking changes to enforced constraints
time: 2023-05-03T10:11:00.617936-05:00
custom:
Author: emmyoop
Issue: "7065"
7 changes: 7 additions & 0 deletions core/dbt/adapters/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ def get_include_paths(self, name: Optional[str]) -> List[Path]:
def get_adapter_type_names(self, name: Optional[str]) -> List[str]:
return [p.adapter.type() for p in self.get_adapter_plugins(name)]

def get_adapter_constraint_support(self, name: Optional[str]) -> List[str]:
return self.lookup_adapter(name).CONSTRAINT_SUPPORT # type: ignore


FACTORY: AdapterContainer = AdapterContainer()

Expand Down Expand Up @@ -214,6 +217,10 @@ def get_adapter_type_names(name: Optional[str]) -> List[str]:
return FACTORY.get_adapter_type_names(name)


def get_adapter_constraint_support(name: Optional[str]) -> List[str]:
return FACTORY.get_adapter_constraint_support(name)


@contextmanager
def adapter_management():
reset_adapters()
Expand Down
169 changes: 122 additions & 47 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def same_config(self, old) -> bool:
def build_contract_checksum(self):
pass

def same_contract(self, old) -> bool:
def same_contract(self, old, adapter_type=None) -> bool:
# This would only apply to seeds
return True

Expand Down Expand Up @@ -500,13 +500,13 @@ def patch(self, patch: "ParsedNodePatch"):
)
)

def same_contents(self, old) -> bool:
def same_contents(self, old, adapter_type) -> bool:
if old is None:
return False

# Need to ensure that same_contract is called because it
# could throw an error
same_contract = self.same_contract(old)
same_contract = self.same_contract(old, adapter_type)
return (
self.same_body(old)
and self.same_config(old)
Expand Down Expand Up @@ -591,6 +591,46 @@ def depends_on_public_nodes(self):
def depends_on_macros(self):
return self.depends_on.macros


# ====================================
# CompiledNode subclasses
# ====================================


@dataclass
class AnalysisNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Analysis]})


@dataclass
class HookNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Operation]})
index: Optional[int] = None


@dataclass
class ModelNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Model]})
access: AccessType = AccessType.Protected
constraints: List[ModelLevelConstraint] = field(default_factory=list)
version: Optional[NodeVersion] = None
latest_version: Optional[NodeVersion] = None

@property
def is_latest_version(self) -> bool:
return self.version is not None and self.version == self.latest_version

@property
def search_name(self):
if self.version is None:
return self.name
else:
return f"{self.name}.v{self.version}"

@property
def materialization_enforces_constraints(self) -> bool:
return self.config.materialized in ["table", "incremental"]

def build_contract_checksum(self):
# We don't need to construct the checksum if the model does not
# have contract enforced, because it won't be used.
Expand All @@ -603,10 +643,14 @@ def build_contract_checksum(self):
for column in sorted_columns:
contract_state += f"|{column.name}"
contract_state += str(column.data_type)
contract_state += str(column.constraints)
if self.materialization_enforces_constraints:
contract_state += self.config.materialized
contract_state += str(self.constraints)
data = contract_state.encode("utf-8")
self.contract.checksum = hashlib.new("sha256", data).hexdigest()

def same_contract(self, old) -> bool:
def same_contract(self, old, adapter_type=None) -> bool:
# If the contract wasn't previously enforced:
if old.contract.enforced is False and self.contract.enforced is False:
# No change -- same_contract: True
Expand All @@ -627,32 +671,99 @@ def same_contract(self, old) -> bool:
contract_enforced_disabled: bool = False
columns_removed: List[str] = []
column_type_changes: List[Tuple[str, str, str]] = []
enforced_column_constraint_removed: List[Tuple[str, str]] = [] # column, constraint_type
enforced_model_constraint_removed: List[
Tuple[str, List[str]]
] = [] # constraint_type, columns
materialization_changed: List[str] = []

if old.contract.enforced is True and self.contract.enforced is False:
# Breaking change: the contract was previously enforced, and it no longer is
contract_enforced_disabled = True

# TODO: this avoid the circular imports but isn't ideal
from dbt.adapters.factory import get_adapter_constraint_support
from dbt.adapters.base import ConstraintSupport

constraint_support = get_adapter_constraint_support(adapter_type)
column_constraints_exist = False

# Next, compare each column from the previous contract (old.columns)
for key, value in sorted(old.columns.items()):
for old_key, old_value in sorted(old.columns.items()):
# Has this column been removed?
if key not in self.columns.keys():
columns_removed.append(value.name)
if old_key not in self.columns.keys():
columns_removed.append(old_value.name)
# Has this column's data type changed?
elif value.data_type != self.columns[key].data_type:
elif old_value.data_type != self.columns[old_key].data_type:
column_type_changes.append(
(str(value.name), str(value.data_type), str(self.columns[key].data_type))
(
str(old_value.name),
str(old_value.data_type),
str(self.columns[old_key].data_type),
)
)

# track if there are any column level constraints for the materialization check late
if old_value.constraints:
column_constraints_exist = True

# Have enforced columns level constraints changed?
# Constraints are only enforced for table and incremental materializations.
# We only really care if the old node was one of those materializations for breaking changes
if (
old_key in self.columns.keys()
and old_value.constraints != self.columns[old_key].constraints
and old.materialization_enforces_constraints
):

for old_constraint in old_value.constraints:
if (
old_constraint not in self.columns[old_key].constraints
and constraint_support[old_constraint.type] == ConstraintSupport.ENFORCED
):
enforced_column_constraint_removed.append(
(old_key, str(old_constraint.type))
)

# Now compare the model level constraints
if old.constraints != self.constraints and old.materialization_enforces_constraints:
for old_constraint in old.constraints:
if (
old_constraint not in self.constraints
and constraint_support[old_constraint.type] == ConstraintSupport.ENFORCED
):
enforced_model_constraint_removed.append(
(str(old_constraint.type), old_constraint.columns)
)

# Check for relevant materialization changes.
if (
old.materialization_enforces_constraints
and not self.materialization_enforces_constraints
and (old.constraints or column_constraints_exist)
):
materialization_changed = [old.config.materialized, self.config.materialized]

# If a column has been added, it will be missing in the old.columns, and present in self.columns
# That's a change (caught by the different checksums), but not a breaking change

# Did we find any changes that we consider breaking? If so, that's an error
if contract_enforced_disabled or columns_removed or column_type_changes:
if (
contract_enforced_disabled
or columns_removed
or column_type_changes
or enforced_model_constraint_removed
or enforced_column_constraint_removed
or materialization_changed
):
raise (
ContractBreakingChangeError(
contract_enforced_disabled=contract_enforced_disabled,
columns_removed=columns_removed,
column_type_changes=column_type_changes,
enforced_column_constraint_removed=enforced_column_constraint_removed,
enforced_model_constraint_removed=enforced_model_constraint_removed,
materialization_changed=materialization_changed,
node=self,
)
)
Expand All @@ -662,42 +773,6 @@ def same_contract(self, old) -> bool:
return False


# ====================================
# CompiledNode subclasses
# ====================================


@dataclass
class AnalysisNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Analysis]})


@dataclass
class HookNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Operation]})
index: Optional[int] = None


@dataclass
class ModelNode(CompiledNode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Model]})
access: AccessType = AccessType.Protected
constraints: List[ModelLevelConstraint] = field(default_factory=list)
version: Optional[NodeVersion] = None
latest_version: Optional[NodeVersion] = None

@property
def is_latest_version(self) -> bool:
return self.version is not None and self.version == self.latest_version

@property
def search_name(self):
if self.version is None:
return self.name
else:
return f"{self.name}.v{self.version}"


# TODO: rm?
@dataclass
class RPCNode(CompiledNode):
Expand Down Expand Up @@ -887,7 +962,7 @@ class GenericTestNode(TestShouldStoreFailures, CompiledNode, HasTestMetadata):
config: TestConfig = field(default_factory=TestConfig) # type: ignore
attached_node: Optional[str] = None

def same_contents(self, other) -> bool:
def same_contents(self, other, adapter_type: Optional[str]) -> bool:
if other is None:
return False

Expand Down
35 changes: 33 additions & 2 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
import io
import agate
from typing import Any, Dict, List, Mapping, Optional, Union
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union

from dbt.dataclass_schema import ValidationError
from dbt.events.helpers import env_secrets, scrub_secrets
Expand Down Expand Up @@ -212,11 +212,21 @@ class ContractBreakingChangeError(DbtRuntimeError):
MESSAGE = "Breaking Change to Contract"

def __init__(
self, contract_enforced_disabled, columns_removed, column_type_changes, node=None
self,
contract_enforced_disabled: bool,
columns_removed: List[str],
column_type_changes: List[Tuple[str, str, str]],
enforced_column_constraint_removed: List[Tuple[str, str]],
enforced_model_constraint_removed: List[Tuple[str, List[str]]],
materialization_changed: List[str],
node=None,
):
self.contract_enforced_disabled = contract_enforced_disabled
self.columns_removed = columns_removed
self.column_type_changes = column_type_changes
self.enforced_column_constraint_removed = enforced_column_constraint_removed
self.enforced_model_constraint_removed = enforced_model_constraint_removed
self.materialization_changed = materialization_changed
super().__init__(self.message(), node)

@property
Expand All @@ -237,6 +247,27 @@ def message(self):
breaking_changes.append(
f"Columns with data_type changes: \n - {column_type_changes_str}"
)
if self.enforced_column_constraint_removed:
column_constraint_changes_str = "\n - ".join(
[f"{c[0]} ({c[1]})" for c in self.enforced_column_constraint_removed]
)
breaking_changes.append(
f"Enforced column level constraints were removed: \n - {column_constraint_changes_str}"
)
if self.enforced_model_constraint_removed:
model_constraint_changes_str = "\n - ".join(
[f"{c[0]} -> {c[1]}" for c in self.enforced_model_constraint_removed]
)
breaking_changes.append(
f"Enforced model level constraints were removed: \n - {model_constraint_changes_str}"
)
if self.materialization_changed:
materialization_changes_str = "\n - ".join(
f"{self.materialization_changed[0]} -> {self.materialization_changed[1]}"
)
breaking_changes.append(
f"Materialization changed with enforced constraints: \n - {materialization_changes_str}"
)

reasons = "\n\n".join(breaking_changes)

Expand Down
2 changes: 1 addition & 1 deletion core/dbt/graph/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def can_select_indirectly(node):


class NodeSelector(MethodManager):
"""The node selector is aware of the graph and manifest,"""
"""The node selector is aware of the graph and manifest"""

def __init__(
self,
Expand Down
Loading

0 comments on commit 05595f5

Please sign in to comment.