dbt-labs · jtcohen6 · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Adding a new column is not a breaking contract change
+time: 2023-04-12T13:34:38.231881+02:00
+custom:
+  Author: jtcohen6
+  Issue: "7332"
@@ -39,7 +39,7 @@
 )
 from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
 from dbt.events.functions import warn_or_error
-from dbt.exceptions import ParsingError, InvalidAccessTypeError, ModelContractError
+from dbt.exceptions import ParsingError, InvalidAccessTypeError, ContractBreakingChangeError
 from dbt.events.types import (
     SeedIncreased,
     SeedExceedsLimitSamePath,
@@ -539,29 +539,61 @@ def build_contract_checksum(self):
             self.contract.checksum = hashlib.new("sha256", data).hexdigest()
 
     def same_contract(self, old) -> bool:
+        # If the contract wasn't previously enforced
         if old.contract.enforced is False and self.contract.enforced is False:
             # Not a change
             return True
         if old.contract.enforced is False and self.contract.enforced is True:
             # A change, but not a breaking change
             return False
 
-        breaking_change_reasons = []
+        # Otherwise: the contract was previously enforced
+        self.build_contract_checksum()
+
+        # If the checksums match up, the contract has not changed, so same_contract: True
+        if self.contract.checksum == old.contract.checksum:
+            return True
+
+        # The checksums don't match up, so there has been a change.
+        # We need to determine if it's a **breaking** change.
+        # These are the categories of breaking changes:
+        contract_enforced_disabled: bool = False
+        columns_removed: List[str] = []
+        column_type_changes: List[Tuple[str, str, str]] = []
+
         if old.contract.enforced is True and self.contract.enforced is False:
-            # Breaking change: throw an error
-            # Note: we don't have contract.checksum for current node, so build
+            # Breaking change: the contract was previously enforced, and it no longer is
+            # Note: we don't have contract.checksum for current node, so build it now
             self.build_contract_checksum()
 def build_contract_checksum(self): 
     # We don't need to construct the checksum if the model does not 
     # have contract enforced, because it won't be used. 
     # This needs to be executed after contract config is set 
     if self.contract.enforced is True: 
 def build_contract_checksum(self): 
     # We don't need to construct the checksum if the model does not 
     # have contract enforced, because it won't be used. 
     # This needs to be executed after contract config is set 
     if self.contract.enforced is True: 
-            breaking_change_reasons.append("contract has been disabled")
-
-        if self.contract.checksum != old.contract.checksum:
-            # Breaking change, throw error
-            breaking_change_reasons.append("column definitions have changed")
+            contract_enforced_disabled = True
+
+        for key, value in sorted(old.columns.items()):
+            # Has this column been removed?
+            if key not in self.columns.keys():
+                columns_removed.append(value.name)
+            # Has this column's data type changed?
+            elif value.data_type != self.columns[key].data_type:
+                column_type_changes.append(
+                    (str(value.name), str(value.data_type), str(self.columns[key].data_type))
+                )
+            # Otherwise, this was an additive change -- not breaking
+            else:
+                continue
+
+        # Did we find any changes that we consider breaking? If so, throw an error
+        if contract_enforced_disabled or columns_removed or column_type_changes:
+            raise (
+                ContractBreakingChangeError(
+                    contract_enforced_disabled=contract_enforced_disabled,
+                    columns_removed=columns_removed,
+                    column_type_changes=column_type_changes,
+                    node=self,
+                )
+            )
 
-        if breaking_change_reasons:
-            raise (ModelContractError(reasons=" and ".join(breaking_change_reasons), node=self))
+        # Otherwise, the contract has still changed, so same_contract: False
         else:
-            # no breaking changes
-            return True
+            return False
 
 
 # ====================================

@@ -207,22 +207,44 @@ def _fix_dupe_msg(self, path_1: str, path_2: str, name: str, type_name: str) ->
             )
 
 
-class ModelContractError(DbtRuntimeError):
+class ContractBreakingChangeError(DbtRuntimeError):
     CODE = 10016
-    MESSAGE = "Contract Error"
+    MESSAGE = "Breaking Change to Contract"
 
-    def __init__(self, reasons, node=None):
-        self.reasons = reasons
+    def __init__(
+        self, contract_enforced_disabled, columns_removed, column_type_changes, node=None
+    ):
+        self.contract_enforced_disabled = contract_enforced_disabled
+        self.columns_removed = columns_removed
+        self.column_type_changes = column_type_changes
         super().__init__(self.message(), node)
 
     @property
     def type(self):
-        return "Contract"
+        return "Breaking Change to Contract"
 
     def message(self):
+        breaking_changes = []
+        if self.contract_enforced_disabled:
+            breaking_changes.append("The contract's enforcement has been disabled.")
+        if self.columns_removed:
+            columns_removed_str = "\n  - ".join(self.columns_removed)
+            breaking_changes.append(f"Columns were removed: \n - {columns_removed_str}")
+        if self.column_type_changes:
+            column_type_changes_str = "\n  - ".join(
+                [f"{c[0]} ({c[1]} -> {c[2]})" for c in self.column_type_changes]
+            )
+            breaking_changes.append(
+                f"Columns with data_type changes: \n - {column_type_changes_str}"
+            )
+
+        reasons = "\n\n".join(breaking_changes)
+
         return (
-            f"There is a breaking change in the model contract because {self.reasons}; "
-            "you may need to create a new version. See: https://docs.getdbt.com/docs/collaborate/publish/model-versions"
+            "While comparing to previous project state, dbt detected a breaking change to an enforced contract."
+            f"\n\n{reasons}\n\n"
+            "Consider making an additive (non-breaking) change instead, if possible.\n"
+            "Otherwise, create a new model version: https://docs.getdbt.com/docs/collaborate/publish/model-versions"
         )
 
 

@@ -7,7 +7,7 @@
 
 from dbt.tests.util import run_dbt, update_config_file, write_file, get_manifest
 
-from dbt.exceptions import CompilationError, ModelContractError
+from dbt.exceptions import CompilationError, ContractBreakingChangeError
 
 from tests.functional.defer_state.fixtures import (
     seed_csv,
@@ -302,12 +302,12 @@ def test_changed_contract(self, project):
         second_contract_checksum = model.contract.checksum
         # double check different contract_checksums
         assert first_contract_checksum != second_contract_checksum
-        with pytest.raises(ModelContractError):
+        with pytest.raises(ContractBreakingChangeError):
             results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])
 
         # Go back to schema file without contract. Should raise an error.
         write_file(schema_yml, "models", "schema.yml")
-        with pytest.raises(ModelContractError):
+        with pytest.raises(ContractBreakingChangeError):
             results = run_dbt(["run", "--models", "state:modified.contract", "--state", "./state"])
 
 
@@ -320,6 +320,11 @@ def test_changed_contract(self, project):
 select 1 as id
 """
 
+modified_my_model_non_breaking_sql = """
+-- a comment
+select 1 as id, 'blue' as color
+"""
+
 my_model_yml = """
 models:
   - name: my_model
@@ -339,7 +344,20 @@ def test_changed_contract(self, project):
         enforced: true
     columns:
       - name: id
-        data_type: string
+        data_type: text
+"""
+
+modified_my_model_non_breaking_yml = """
+models:
+  - name: my_model
+    config:
+      contract:
+        enforced: true
+    columns:
+      - name: id
+        data_type: int
+      - name: color
+        data_type: text
 """
 
 
@@ -361,10 +379,21 @@ def test_modified_body_and_contract(self, project):
         assert len(results) == 1
         self.copy_state()
 
-        # Change both body and contract
+        # Change both body and contract in a *breaking* way (= changing data_type of existing column)
         write_file(modified_my_model_yml, "models", "my_model.yml")
         write_file(modified_my_model_sql, "models", "my_model.sql")
 
-        # should raise even without specifying state:modified.contract
-        with pytest.raises(ModelContractError):
-            results = run_dbt(["run", "--models", "state:modified", "--state", "./state"])
+        # Should raise even without specifying state:modified.contract
+        with pytest.raises(ContractBreakingChangeError):
+            results = run_dbt(["run", "-s", "state:modified", "--state", "./state"])
+
+        # Change both body and contract in a *non-breaking* way (= adding a new column)
+        write_file(modified_my_model_non_breaking_yml, "models", "my_model.yml")
+        write_file(modified_my_model_non_breaking_sql, "models", "my_model.sql")
+
+        # Should pass
+        run_dbt(["run", "-s", "state:modified", "--state", "./state"])
+
+        # The model's contract has changed, even if non-breaking, so it should be selected by 'state:modified.contract'
+        results = run_dbt(["list", "-s", "state:modified.contract", "--state", "./state"])
+        assert results == ["test.my_model"]