From 33f475dc6b489876590e3e0d987f46286f038715 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 1 Feb 2024 15:40:48 -0500 Subject: [PATCH 01/20] Move ParsedNodeMandatory; need to fix up a few things from core --- core/dbt/artifacts/resources/__init__.py | 12 +- core/dbt/artifacts/resources/base.py | 41 +++++ core/dbt/artifacts/resources/v1/components.py | 42 +++++ core/dbt/artifacts/resources/v1/config.py | 144 ++++++++++++++++++ core/dbt/artifacts/resources/v1/docs.py | 10 -- core/dbt/artifacts/resources/v1/macro.py | 2 +- core/dbt/contracts/files.py | 42 +---- core/dbt/contracts/graph/nodes.py | 49 ++---- core/dbt/parser/manifest.py | 4 +- 9 files changed, 252 insertions(+), 94 deletions(-) create mode 100644 core/dbt/artifacts/resources/v1/config.py delete mode 100644 core/dbt/artifacts/resources/v1/docs.py diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index fba7c9730c4..aff8267f3fb 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -1,7 +1,14 @@ -from dbt.artifacts.resources.base import BaseResource, GraphResource +from dbt.artifacts.resources.base import BaseResource, GraphResource, FileHash # alias to latest resource definitions -from dbt.artifacts.resources.v1.components import DependsOn, NodeVersion, RefArgs +from dbt.artifacts.resources.v1.components import ( + DependsOn, + NodeVersion, + RefArgs, + HasRelationMetadata, + ParsedNodeMandatory, + Docs, +) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( Exposure, @@ -10,7 +17,6 @@ MaturityType, ) from dbt.artifacts.resources.v1.macro import Macro, MacroDependsOn, MacroArgument -from dbt.artifacts.resources.v1.docs import Docs from dbt.artifacts.resources.v1.group import Group from dbt.artifacts.resources.v1.metric import ( ConstantPropertyInput, diff --git a/core/dbt/artifacts/resources/base.py b/core/dbt/artifacts/resources/base.py index 0c29c1d1613..b1e699f6e65 100644 --- a/core/dbt/artifacts/resources/base.py +++ b/core/dbt/artifacts/resources/base.py @@ -2,6 +2,7 @@ from dbt_common.dataclass_schema import dbtClassMixin from dbt_common.contracts.util import Replaceable from typing import List +import hashlib from dbt.artifacts.resources.types import NodeType @@ -19,3 +20,43 @@ class BaseResource(dbtClassMixin, Replaceable): @dataclass class GraphResource(BaseResource): fqn: List[str] + + +@dataclass +class FileHash(dbtClassMixin): + name: str # the hash type name + checksum: str # the hashlib.hash_type().hexdigest() of the file contents + + @classmethod + def empty(cls): + return FileHash(name="none", checksum="") + + @classmethod + def path(cls, path: str): + return FileHash(name="path", checksum=path) + + def __eq__(self, other): + if not isinstance(other, FileHash): + return NotImplemented + + if self.name == "none" or self.name != other.name: + return False + + return self.checksum == other.checksum + + def compare(self, contents: str) -> bool: + """Compare the file contents with the given hash""" + if self.name == "none": + return False + + return self.from_contents(contents, name=self.name) == self.checksum + + @classmethod + def from_contents(cls, contents: str, name="sha256") -> "FileHash": + """Create a file hash from the given file contents. The hash is always + the utf-8 encoding of the contents given, because dbt only reads files + as utf-8. + """ + data = contents.encode("utf-8") + checksum = hashlib.new(name, data).hexdigest() + return cls(name=name, checksum=checksum) diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 9f3e0c8a967..b690a0e6066 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -1,5 +1,7 @@ from dataclasses import dataclass, field from dbt.artifacts.resources.v1.macro import MacroDependsOn +from dbt.artifacts.resources.base import GraphResource, FileHash +from dbt.artifacts.resources.v1.config import NodeConfig from dbt_common.dataclass_schema import dbtClassMixin from typing import Dict, List, Optional, Union @@ -35,3 +37,43 @@ def keyword_args(self) -> Dict[str, Optional[NodeVersion]]: return {"version": self.version} else: return {} + + +@dataclass +class Docs(dbtClassMixin): + show: bool = True + node_color: Optional[str] = None + + +@dataclass +class HasRelationMetadata(dbtClassMixin): + database: Optional[str] + schema: str + + # Can't set database to None like it ought to be + # because it messes up the subclasses and default parameters + # so hack it here + @classmethod + def __pre_deserialize__(cls, data): + data = super().__pre_deserialize__(data) + if "database" not in data: + data["database"] = None + return data + + @property + def quoting_dict(self) -> Dict[str, bool]: + if hasattr(self, "quoting"): + return self.quoting.to_dict(omit_none=True) + else: + return {} + + +@dataclass +class ParsedNodeMandatory(GraphResource, HasRelationMetadata): + alias: str + checksum: FileHash + config: NodeConfig = field(default_factory=NodeConfig) + + @property + def identifier(self): + return self.alias diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py new file mode 100644 index 00000000000..b4c075abf11 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/config.py @@ -0,0 +1,144 @@ +from dbt_common.dataclass_schema import dbtClassMixin, ValidationError +from typing import Optional, List, Any, Dict, Union +from dataclasses import dataclass, field +from dbt_common.contracts.config.base import ( + BaseConfig, + CompareBehavior, + MergeBehavior, +) +from dbt_common.contracts.config.metadata import Metadata, ShowBehavior +from dbt_common.contracts.config.materialization import OnConfigurationChangeOption +from dbt.contracts.util import list_str +from dbt.artifacts.resources.v1.components import Docs +from dbt.contracts.graph.utils import validate_color +from dbt import hooks + + +def metas(*metas: Metadata) -> Dict[str, Any]: + existing: Dict[str, Any] = {} + for m in metas: + existing = m.meta(existing) + return existing + + +@dataclass +class ContractConfig(dbtClassMixin): + enforced: bool = False + alias_types: bool = True + + +@dataclass +class Hook(dbtClassMixin): + sql: str + transaction: bool = True + index: Optional[int] = None + + +@dataclass +class NodeAndTestConfig(BaseConfig): + enabled: bool = True + # these fields are included in serialized output, but are not part of + # config comparison (they are part of database_representation) + alias: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + schema: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + database: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + tags: Union[List[str], str] = field( + default_factory=list_str, + metadata=metas(ShowBehavior.Hide, MergeBehavior.Append, CompareBehavior.Exclude), + ) + meta: Dict[str, Any] = field( + default_factory=dict, + metadata=MergeBehavior.Update.meta(), + ) + group: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + + +@dataclass +class NodeConfig(NodeAndTestConfig): + # Note: if any new fields are added with MergeBehavior, also update the + # 'mergebehavior' dictionary + materialized: str = "view" + incremental_strategy: Optional[str] = None + persist_docs: Dict[str, Any] = field(default_factory=dict) + post_hook: List[Hook] = field( + default_factory=list, + metadata={"merge": MergeBehavior.Append, "alias": "post-hook"}, + ) + pre_hook: List[Hook] = field( + default_factory=list, + metadata={"merge": MergeBehavior.Append, "alias": "pre-hook"}, + ) + quoting: Dict[str, Any] = field( + default_factory=dict, + metadata=MergeBehavior.Update.meta(), + ) + # This is actually only used by seeds. Should it be available to others? + # That would be a breaking change! + column_types: Dict[str, Any] = field( + default_factory=dict, + metadata=MergeBehavior.Update.meta(), + ) + full_refresh: Optional[bool] = None + # 'unique_key' doesn't use 'Optional' because typing.get_type_hints was + # sometimes getting the Union order wrong, causing serialization failures. + unique_key: Union[str, List[str], None] = None + on_schema_change: Optional[str] = "ignore" + on_configuration_change: OnConfigurationChangeOption = field( + default_factory=OnConfigurationChangeOption.default + ) + grants: Dict[str, Any] = field( + default_factory=dict, metadata=MergeBehavior.DictKeyAppend.meta() + ) + packages: List[str] = field( + default_factory=list, + metadata=MergeBehavior.Append.meta(), + ) + docs: Docs = field( + default_factory=Docs, + metadata=MergeBehavior.Update.meta(), + ) + contract: ContractConfig = field( + default_factory=ContractConfig, + metadata=MergeBehavior.Update.meta(), + ) + + def __post_init__(self): + # we validate that node_color has a suitable value to prevent dbt-docs from crashing + if self.docs.node_color: + node_color = self.docs.node_color + if not validate_color(node_color): + raise ValidationError( + f"Invalid color name for docs.node_color: {node_color}. " + "It is neither a valid HTML color name nor a valid HEX code." + ) + + if ( + self.contract.enforced + and self.materialized == "incremental" + and self.on_schema_change not in ("append_new_columns", "fail") + ): + raise ValidationError( + f"Invalid value for on_schema_change: {self.on_schema_change}. Models " + "materialized as incremental with contracts enabled must set " + "on_schema_change to 'append_new_columns' or 'fail'" + ) + + @classmethod + def __pre_deserialize__(cls, data): + data = super().__pre_deserialize__(data) + for key in hooks.ModelHookType: + if key in data: + data[key] = [hooks.get_hook_dict(h) for h in data[key]] + return data diff --git a/core/dbt/artifacts/resources/v1/docs.py b/core/dbt/artifacts/resources/v1/docs.py deleted file mode 100644 index 5dca7a88421..00000000000 --- a/core/dbt/artifacts/resources/v1/docs.py +++ /dev/null @@ -1,10 +0,0 @@ -from dataclasses import dataclass -from dbt_common.dataclass_schema import dbtClassMixin -from dbt_common.contracts.util import Replaceable -from typing import Optional - - -@dataclass -class Docs(dbtClassMixin, Replaceable): - show: bool = True - node_color: Optional[str] = None diff --git a/core/dbt/artifacts/resources/v1/macro.py b/core/dbt/artifacts/resources/v1/macro.py index cda0fd2ac32..bf1fed8fef7 100644 --- a/core/dbt/artifacts/resources/v1/macro.py +++ b/core/dbt/artifacts/resources/v1/macro.py @@ -6,7 +6,7 @@ from dbt_common.dataclass_schema import dbtClassMixin from dbt.artifacts.resources.base import BaseResource from dbt.artifacts.resources.types import NodeType, ModelLanguage -from dbt.artifacts.resources.v1.docs import Docs +from dbt.artifacts.resources.v1.components import Docs @dataclass diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index fe5f91d265e..714782161cc 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -1,4 +1,3 @@ -import hashlib import os from dataclasses import dataclass, field @@ -7,6 +6,7 @@ from dbt.constants import MAXIMUM_SEED_SIZE from dbt_common.dataclass_schema import dbtClassMixin, StrEnum +from dbt.artifacts.resources.base import FileHash from .util import SourceKey @@ -70,46 +70,6 @@ def seed_too_large(self) -> bool: return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE -@dataclass -class FileHash(dbtClassMixin): - name: str # the hash type name - checksum: str # the hashlib.hash_type().hexdigest() of the file contents - - @classmethod - def empty(cls): - return FileHash(name="none", checksum="") - - @classmethod - def path(cls, path: str): - return FileHash(name="path", checksum=path) - - def __eq__(self, other): - if not isinstance(other, FileHash): - return NotImplemented - - if self.name == "none" or self.name != other.name: - return False - - return self.checksum == other.checksum - - def compare(self, contents: str) -> bool: - """Compare the file contents with the given hash""" - if self.name == "none": - return False - - return self.from_contents(contents, name=self.name) == self.checksum - - @classmethod - def from_contents(cls, contents: str, name="sha256") -> "FileHash": - """Create a file hash from the given file contents. The hash is always - the utf-8 encoding of the contents given, because dbt only reads files - as utf-8. - """ - data = contents.encode("utf-8") - checksum = hashlib.new(name, data).hexdigest() - return cls(name=name, checksum=checksum) - - @dataclass class RemoteFile(dbtClassMixin): def __init__(self, language) -> None: diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 3561ba146da..63215ce72fc 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -29,7 +29,6 @@ from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin from dbt_common.clients.system import write_file -from dbt.contracts.files import FileHash from dbt.contracts.graph.unparsed import ( ExternalTable, FreshnessThreshold, @@ -93,6 +92,9 @@ RefArgs as RefArgsResource, SavedQuery as SavedQueryResource, SemanticModel as SemanticModelResource, + ParsedNodeMandatory as ParsedNodeMandatoryResource, + HasRelationMetadata, + FileHash, ) # ===================================================================== @@ -196,29 +198,6 @@ class Contract(dbtClassMixin, Replaceable): # Metrics, exposures, -@dataclass -class HasRelationMetadata(dbtClassMixin, Replaceable): - database: Optional[str] - schema: str - - # Can't set database to None like it ought to be - # because it messes up the subclasses and default parameters - # so hack it here - @classmethod - def __pre_deserialize__(cls, data): - data = super().__pre_deserialize__(data) - if "database" not in data: - data["database"] = None - return data - - @property - def quoting_dict(self) -> Dict[str, bool]: - if hasattr(self, "quoting"): - return self.quoting.to_dict(omit_none=True) - else: - return {} - - @dataclass class DeferRelation(HasRelationMetadata): alias: str @@ -230,14 +209,10 @@ def identifier(self): @dataclass -class ParsedNodeMandatory(GraphNode[GraphResource], HasRelationMetadata, Replaceable): - alias: str - checksum: FileHash - config: NodeConfig = field(default_factory=NodeConfig) - - @property - def identifier(self): - return self.alias +class ParsedNodeMandatory( + ParsedNodeMandatoryResource, GraphNode[GraphResource], HasRelationMetadata, Replaceable +): + pass # This needs to be in all ManifestNodes and also in SourceDefinition, @@ -516,7 +491,7 @@ class HookNode(CompiledNode): class ModelNode(CompiledNode): resource_type: Literal[NodeType.Model] access: AccessType = AccessType.Protected - config: ModelConfig = field(default_factory=ModelConfig) + config: ModelConfig = field(default_factory=ModelConfig) # type: ignore[assignment] constraints: List[ModelLevelConstraint] = field(default_factory=list) version: Optional[NodeVersion] = None latest_version: Optional[NodeVersion] = None @@ -818,7 +793,7 @@ class SqlNode(CompiledNode): @dataclass class SeedNode(ParsedNode): # No SQLDefaults! resource_type: Literal[NodeType.Seed] - config: SeedConfig = field(default_factory=SeedConfig) + config: SeedConfig = field(default_factory=SeedConfig) # type: ignore[assignment] # seeds need the root_path because the contents are not loaded initially # and we need the root_path to load the seed later root_path: Optional[str] = None @@ -1014,7 +989,7 @@ class UnitTestNode(CompiledNode): tested_node_unique_id: Optional[str] = None this_input_node_unique_id: Optional[str] = None overrides: Optional[UnitTestOverrides] = None - config: UnitTestNodeConfig = field(default_factory=UnitTestNodeConfig) + config: UnitTestNodeConfig = field(default_factory=UnitTestNodeConfig) # type: ignore[assignment] @dataclass @@ -1094,13 +1069,13 @@ class IntermediateSnapshotNode(CompiledNode): # into a full ParsedSnapshotNode after rendering. Note: it currently does # not work to set snapshot config in schema files because of the validation. resource_type: Literal[NodeType.Snapshot] - config: EmptySnapshotConfig = field(default_factory=EmptySnapshotConfig) + config: EmptySnapshotConfig = field(default_factory=EmptySnapshotConfig) # type: ignore[assignment] @dataclass class SnapshotNode(CompiledNode): resource_type: Literal[NodeType.Snapshot] - config: SnapshotConfig + config: SnapshotConfig # type: ignore[assignment] defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 66cccfbcac0..eb6ff5b5702 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -79,7 +79,7 @@ from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace from dbt.context.configured import generate_macro_context from dbt.context.providers import ParseProvider, generate_runtime_macro_context -from dbt.contracts.files import FileHash, ParseFileType, SchemaSourceFile +from dbt.contracts.files import ParseFileType, SchemaSourceFile from dbt.parser.read_files import ( ReadFilesFromFileSystem, load_source_file, @@ -107,7 +107,7 @@ ResultNode, ModelNode, ) -from dbt.artifacts.resources import NodeRelation, NodeVersion +from dbt.artifacts.resources import NodeRelation, NodeVersion, FileHash from dbt.artifacts.schemas.base import Writable from dbt.exceptions import ( TargetNotFoundError, From 6d5aaeba6949a06db9abe45daf549d55db37c258 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 1 Feb 2024 16:10:37 -0500 Subject: [PATCH 02/20] cleanup, remove NodeAndTestConfig and NodeConfig from model_config.py --- core/dbt/artifacts/resources/__init__.py | 5 + core/dbt/artifacts/resources/types.py | 5 + core/dbt/artifacts/resources/v1/config.py | 8 +- core/dbt/contracts/graph/model_config.py | 121 +--------------------- core/dbt/contracts/graph/nodes.py | 2 +- 5 files changed, 19 insertions(+), 122 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index aff8267f3fb..6d2eccc95c2 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -56,3 +56,8 @@ SemanticModel, SemanticModelConfig, ) + +from dbt.artifacts.resources.v1.config import ( + NodeAndTestConfig, + NodeConfig, +) diff --git a/core/dbt/artifacts/resources/types.py b/core/dbt/artifacts/resources/types.py index f86ca8a26af..ba5a866e1cf 100644 --- a/core/dbt/artifacts/resources/types.py +++ b/core/dbt/artifacts/resources/types.py @@ -54,3 +54,8 @@ class RunHookType(StrEnum): class ModelLanguage(StrEnum): python = "python" sql = "sql" + + +class ModelHookType(StrEnum): + PreHook = "pre-hook" + PostHook = "post-hook" diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py index b4c075abf11..2e3829f338a 100644 --- a/core/dbt/artifacts/resources/v1/config.py +++ b/core/dbt/artifacts/resources/v1/config.py @@ -8,12 +8,16 @@ ) from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt_common.contracts.config.materialization import OnConfigurationChangeOption -from dbt.contracts.util import list_str from dbt.artifacts.resources.v1.components import Docs +from dbt.artifacts.resources.types import ModelHookType from dbt.contracts.graph.utils import validate_color from dbt import hooks +def list_str() -> List[str]: + return [] + + def metas(*metas: Metadata) -> Dict[str, Any]: existing: Dict[str, Any] = {} for m in metas: @@ -138,7 +142,7 @@ def __post_init__(self): @classmethod def __pre_deserialize__(cls, data): data = super().__pre_deserialize__(data) - for key in hooks.ModelHookType: + for key in ModelHookType: if key in data: data[key] = [hooks.get_hook_dict(h) for h in data[key]] return data diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index b266c52c395..5e081c92fd4 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -7,18 +7,16 @@ MetricConfig, SavedQueryConfig, SemanticModelConfig, + NodeAndTestConfig, + NodeConfig, ) from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior -from dbt_common.contracts.config.materialization import OnConfigurationChangeOption from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt_common.dataclass_schema import ( dbtClassMixin, ValidationError, ) -from dbt.contracts.graph.unparsed import Docs -from dbt.contracts.graph.utils import validate_color from dbt.contracts.util import Replaceable, list_str -from dbt import hooks from dbt.node_types import NodeType, AccessType from mashumaro.jsonschema.annotations import Pattern @@ -59,121 +57,6 @@ class SourceConfig(BaseConfig): enabled: bool = True -@dataclass -class NodeAndTestConfig(BaseConfig): - enabled: bool = True - # these fields are included in serialized output, but are not part of - # config comparison (they are part of database_representation) - alias: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - schema: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - database: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - tags: Union[List[str], str] = field( - default_factory=list_str, - metadata=metas(ShowBehavior.Hide, MergeBehavior.Append, CompareBehavior.Exclude), - ) - meta: Dict[str, Any] = field( - default_factory=dict, - metadata=MergeBehavior.Update.meta(), - ) - group: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - - -@dataclass -class NodeConfig(NodeAndTestConfig): - # Note: if any new fields are added with MergeBehavior, also update the - # 'mergebehavior' dictionary - materialized: str = "view" - incremental_strategy: Optional[str] = None - persist_docs: Dict[str, Any] = field(default_factory=dict) - post_hook: List[Hook] = field( - default_factory=list, - metadata={"merge": MergeBehavior.Append, "alias": "post-hook"}, - ) - pre_hook: List[Hook] = field( - default_factory=list, - metadata={"merge": MergeBehavior.Append, "alias": "pre-hook"}, - ) - quoting: Dict[str, Any] = field( - default_factory=dict, - metadata=MergeBehavior.Update.meta(), - ) - # This is actually only used by seeds. Should it be available to others? - # That would be a breaking change! - column_types: Dict[str, Any] = field( - default_factory=dict, - metadata=MergeBehavior.Update.meta(), - ) - full_refresh: Optional[bool] = None - # 'unique_key' doesn't use 'Optional' because typing.get_type_hints was - # sometimes getting the Union order wrong, causing serialization failures. - unique_key: Union[str, List[str], None] = None - on_schema_change: Optional[str] = "ignore" - on_configuration_change: OnConfigurationChangeOption = field( - default_factory=OnConfigurationChangeOption.default - ) - grants: Dict[str, Any] = field( - default_factory=dict, metadata=MergeBehavior.DictKeyAppend.meta() - ) - packages: List[str] = field( - default_factory=list, - metadata=MergeBehavior.Append.meta(), - ) - docs: Docs = field( - default_factory=Docs, - metadata=MergeBehavior.Update.meta(), - ) - contract: ContractConfig = field( - default_factory=ContractConfig, - metadata=MergeBehavior.Update.meta(), - ) - - def __post_init__(self): - # we validate that node_color has a suitable value to prevent dbt-docs from crashing - if self.docs.node_color: - node_color = self.docs.node_color - if not validate_color(node_color): - raise ValidationError( - f"Invalid color name for docs.node_color: {node_color}. " - "It is neither a valid HTML color name nor a valid HEX code." - ) - - if ( - self.contract.enforced - and self.materialized == "incremental" - and self.on_schema_change not in ("append_new_columns", "fail") - ): - raise ValidationError( - f"Invalid value for on_schema_change: {self.on_schema_change}. Models " - "materialized as incremental with contracts enabled must set " - "on_schema_change to 'append_new_columns' or 'fail'" - ) - - @classmethod - def __pre_deserialize__(cls, data): - data = super().__pre_deserialize__(data) - for key in hooks.ModelHookType: - if key in data: - data[key] = [hooks.get_hook_dict(h) for h in data[key]] - return data - - # this is still used by jsonschema validation - @classmethod - def field_mapping(cls): - return {"post_hook": "post-hook", "pre_hook": "pre-hook"} - - @dataclass class ModelConfig(NodeConfig): access: AccessType = field( diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 63215ce72fc..fd65e9ab3b5 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -65,7 +65,6 @@ ) from .model_config import ( - NodeConfig, ModelConfig, SeedConfig, TestConfig, @@ -95,6 +94,7 @@ ParsedNodeMandatory as ParsedNodeMandatoryResource, HasRelationMetadata, FileHash, + NodeConfig, ) # ===================================================================== From 73b4f9b03f8830d36963b703fc950b094439ad59 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 1 Feb 2024 16:13:05 -0500 Subject: [PATCH 03/20] Remove some unnecessar type: ignores --- core/dbt/contracts/graph/nodes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index fd65e9ab3b5..9314790d6db 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -491,7 +491,7 @@ class HookNode(CompiledNode): class ModelNode(CompiledNode): resource_type: Literal[NodeType.Model] access: AccessType = AccessType.Protected - config: ModelConfig = field(default_factory=ModelConfig) # type: ignore[assignment] + config: ModelConfig = field(default_factory=ModelConfig) constraints: List[ModelLevelConstraint] = field(default_factory=list) version: Optional[NodeVersion] = None latest_version: Optional[NodeVersion] = None @@ -793,7 +793,7 @@ class SqlNode(CompiledNode): @dataclass class SeedNode(ParsedNode): # No SQLDefaults! resource_type: Literal[NodeType.Seed] - config: SeedConfig = field(default_factory=SeedConfig) # type: ignore[assignment] + config: SeedConfig = field(default_factory=SeedConfig) # seeds need the root_path because the contents are not loaded initially # and we need the root_path to load the seed later root_path: Optional[str] = None @@ -989,7 +989,7 @@ class UnitTestNode(CompiledNode): tested_node_unique_id: Optional[str] = None this_input_node_unique_id: Optional[str] = None overrides: Optional[UnitTestOverrides] = None - config: UnitTestNodeConfig = field(default_factory=UnitTestNodeConfig) # type: ignore[assignment] + config: UnitTestNodeConfig = field(default_factory=UnitTestNodeConfig) @dataclass @@ -1069,13 +1069,13 @@ class IntermediateSnapshotNode(CompiledNode): # into a full ParsedSnapshotNode after rendering. Note: it currently does # not work to set snapshot config in schema files because of the validation. resource_type: Literal[NodeType.Snapshot] - config: EmptySnapshotConfig = field(default_factory=EmptySnapshotConfig) # type: ignore[assignment] + config: EmptySnapshotConfig = field(default_factory=EmptySnapshotConfig) @dataclass class SnapshotNode(CompiledNode): resource_type: Literal[NodeType.Snapshot] - config: SnapshotConfig # type: ignore[assignment] + config: SnapshotConfig defer_relation: Optional[DeferRelation] = None From 755e41c448b8c6346759176e2cd526f7bac5e89c Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 1 Feb 2024 16:19:34 -0500 Subject: [PATCH 04/20] Move MacroDependsOn and Docs (circular reference) --- core/dbt/artifacts/resources/__init__.py | 3 +-- core/dbt/artifacts/resources/base.py | 8 +++++++- core/dbt/artifacts/resources/v1/components.py | 17 ++++++++++------- core/dbt/artifacts/resources/v1/config.py | 2 +- core/dbt/artifacts/resources/v1/macro.py | 15 ++------------- 5 files changed, 21 insertions(+), 24 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 6d2eccc95c2..9ba635004b7 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -1,4 +1,4 @@ -from dbt.artifacts.resources.base import BaseResource, GraphResource, FileHash +from dbt.artifacts.resources.base import BaseResource, GraphResource, FileHash, Docs # alias to latest resource definitions from dbt.artifacts.resources.v1.components import ( @@ -7,7 +7,6 @@ RefArgs, HasRelationMetadata, ParsedNodeMandatory, - Docs, ) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( diff --git a/core/dbt/artifacts/resources/base.py b/core/dbt/artifacts/resources/base.py index b1e699f6e65..93eca86a814 100644 --- a/core/dbt/artifacts/resources/base.py +++ b/core/dbt/artifacts/resources/base.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from dbt_common.dataclass_schema import dbtClassMixin from dbt_common.contracts.util import Replaceable -from typing import List +from typing import List, Optional import hashlib from dbt.artifacts.resources.types import NodeType @@ -60,3 +60,9 @@ def from_contents(cls, contents: str, name="sha256") -> "FileHash": data = contents.encode("utf-8") checksum = hashlib.new(name, data).hexdigest() return cls(name=name, checksum=checksum) + + +@dataclass +class Docs(dbtClassMixin): + show: bool = True + node_color: Optional[str] = None diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index b690a0e6066..1723026d333 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from dbt.artifacts.resources.v1.macro import MacroDependsOn from dbt.artifacts.resources.base import GraphResource, FileHash from dbt.artifacts.resources.v1.config import NodeConfig from dbt_common.dataclass_schema import dbtClassMixin @@ -9,6 +8,16 @@ NodeVersion = Union[str, float] +@dataclass +class MacroDependsOn(dbtClassMixin): + macros: List[str] = field(default_factory=list) + + # 'in' on lists is O(n) so this is O(n^2) for # of macros + def add_macro(self, value: str): + if value not in self.macros: + self.macros.append(value) + + @dataclass class DependsOn(MacroDependsOn): nodes: List[str] = field(default_factory=list) @@ -39,12 +48,6 @@ def keyword_args(self) -> Dict[str, Optional[NodeVersion]]: return {} -@dataclass -class Docs(dbtClassMixin): - show: bool = True - node_color: Optional[str] = None - - @dataclass class HasRelationMetadata(dbtClassMixin): database: Optional[str] diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py index 2e3829f338a..4323465df56 100644 --- a/core/dbt/artifacts/resources/v1/config.py +++ b/core/dbt/artifacts/resources/v1/config.py @@ -8,7 +8,7 @@ ) from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt_common.contracts.config.materialization import OnConfigurationChangeOption -from dbt.artifacts.resources.v1.components import Docs +from dbt.artifacts.resources.base import Docs from dbt.artifacts.resources.types import ModelHookType from dbt.contracts.graph.utils import validate_color from dbt import hooks diff --git a/core/dbt/artifacts/resources/v1/macro.py b/core/dbt/artifacts/resources/v1/macro.py index bf1fed8fef7..be02d529ee1 100644 --- a/core/dbt/artifacts/resources/v1/macro.py +++ b/core/dbt/artifacts/resources/v1/macro.py @@ -2,11 +2,10 @@ import time from typing import Literal, List, Dict, Optional, Any -from dbt_common.contracts.util import Replaceable from dbt_common.dataclass_schema import dbtClassMixin -from dbt.artifacts.resources.base import BaseResource +from dbt.artifacts.resources.base import BaseResource, Docs from dbt.artifacts.resources.types import NodeType, ModelLanguage -from dbt.artifacts.resources.v1.components import Docs +from dbt.artifacts.resources.v1.components import MacroDependsOn @dataclass @@ -16,16 +15,6 @@ class MacroArgument(dbtClassMixin): description: str = "" -@dataclass -class MacroDependsOn(dbtClassMixin, Replaceable): - macros: List[str] = field(default_factory=list) - - # 'in' on lists is O(n) so this is O(n^2) for # of macros - def add_macro(self, value: str): - if value not in self.macros: - self.macros.append(value) - - @dataclass class Macro(BaseResource): macro_sql: str From 6313fa253927874235d8dbdc65ecf65dad072147 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 6 Feb 2024 16:05:41 -0500 Subject: [PATCH 05/20] ParsedNode, initial attempt --- core/dbt/artifacts/resources/__init__.py | 2 + core/dbt/artifacts/resources/v1/components.py | 44 +++++++++++++++++-- core/dbt/contracts/graph/nodes.py | 40 +++-------------- 3 files changed, 49 insertions(+), 37 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 9ba635004b7..baddccfdca4 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -7,6 +7,8 @@ RefArgs, HasRelationMetadata, ParsedNodeMandatory, + ParsedNode, + ColumnInfo, ) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 1723026d333..cf9730a1630 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -1,8 +1,14 @@ +import time from dataclasses import dataclass, field -from dbt.artifacts.resources.base import GraphResource, FileHash +from dbt.artifacts.resources.base import GraphResource, FileHash, Docs from dbt.artifacts.resources.v1.config import NodeConfig -from dbt_common.dataclass_schema import dbtClassMixin -from typing import Dict, List, Optional, Union +from dbt_common.dataclass_schema import ( + dbtClassMixin, + ExtensibleDbtClassMixin, +) +from dbt_common.contracts.config.properties import AdditionalPropertiesMixin +from dbt_common.contracts.constraints import ColumnLevelConstraint +from typing import Dict, List, Optional, Union, Any NodeVersion = Union[str, float] @@ -48,6 +54,20 @@ def keyword_args(self) -> Dict[str, Optional[NodeVersion]]: return {} +@dataclass +class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin): + """Used in all ManifestNodes and SourceDefinition""" + + name: str + description: str = "" + meta: Dict[str, Any] = field(default_factory=dict) + data_type: Optional[str] = None + constraints: List[ColumnLevelConstraint] = field(default_factory=list) + quote: Optional[bool] = None + tags: List[str] = field(default_factory=list) + _extra: Dict[str, Any] = field(default_factory=dict) + + @dataclass class HasRelationMetadata(dbtClassMixin): database: Optional[str] @@ -80,3 +100,21 @@ class ParsedNodeMandatory(GraphResource, HasRelationMetadata): @property def identifier(self): return self.alias + + +@dataclass +class ParsedNode(ParsedNodeMandatory): + tags: List[str] = field(default_factory=list) + description: str = field(default="") + columns: Dict[str, ColumnInfo] = field(default_factory=dict) + meta: Dict[str, Any] = field(default_factory=dict) + group: Optional[str] = None + docs: Docs = field(default_factory=Docs) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: bool = False + unrendered_config: Dict[str, Any] = field(default_factory=dict) + created_at: float = field(default_factory=lambda: time.time()) + config_call_dict: Dict[str, Any] = field(default_factory=dict) + relation_name: Optional[str] = None + raw_code: str = "" diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index b1cef29a2d0..60b0da12889 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -22,11 +22,10 @@ from dbt import deprecations from dbt_common.contracts.constraints import ( - ColumnLevelConstraint, ConstraintType, ModelLevelConstraint, ) -from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin +from dbt_common.dataclass_schema import dbtClassMixin from dbt_common.clients.system import write_file from dbt.contracts.graph.unparsed import ( @@ -45,7 +44,6 @@ ) from dbt.contracts.graph.node_args import ModelNodeArgs from dbt.contracts.util import Replaceable -from dbt_common.contracts.config.properties import AdditionalPropertiesMixin from dbt_common.events.functions import warn_or_error from dbt.exceptions import ParsingError, ContractBreakingChangeError, ValidationError from dbt.events.types import ( @@ -92,9 +90,11 @@ SavedQuery as SavedQueryResource, SemanticModel as SemanticModelResource, ParsedNodeMandatory as ParsedNodeMandatoryResource, + ParsedNode as ParsedNodeResource, HasRelationMetadata, FileHash, NodeConfig, + ColumnInfo, ) # ===================================================================== @@ -176,20 +176,6 @@ def same_fqn(self, other) -> bool: return self.fqn == other.fqn -@dataclass -class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin, Replaceable): - """Used in all ManifestNodes and SourceDefinition""" - - name: str - description: str = "" - meta: Dict[str, Any] = field(default_factory=dict) - data_type: Optional[str] = None - constraints: List[ColumnLevelConstraint] = field(default_factory=list) - quote: Optional[bool] = None - tags: List[str] = field(default_factory=list) - _extra: Dict[str, Any] = field(default_factory=dict) - - @dataclass class Contract(dbtClassMixin, Replaceable): enforced: bool = False @@ -216,7 +202,8 @@ class ParsedNodeMandatory( # This needs to be in all ManifestNodes and also in SourceDefinition, -# because of "source freshness" +# because of "source freshness". Should not be in artifacts, because we +# don't write out _event_status. @dataclass class NodeInfoMixin: _event_status: Dict[str, Any] = field(default_factory=dict) @@ -252,22 +239,7 @@ def clear_event_status(self): @dataclass -class ParsedNode(NodeInfoMixin, ParsedNodeMandatory, SerializableType): - tags: List[str] = field(default_factory=list) - description: str = field(default="") - columns: Dict[str, ColumnInfo] = field(default_factory=dict) - meta: Dict[str, Any] = field(default_factory=dict) - group: Optional[str] = None - docs: Docs = field(default_factory=Docs) - patch_path: Optional[str] = None - build_path: Optional[str] = None - deferred: bool = False - unrendered_config: Dict[str, Any] = field(default_factory=dict) - created_at: float = field(default_factory=lambda: time.time()) - config_call_dict: Dict[str, Any] = field(default_factory=dict) - relation_name: Optional[str] = None - raw_code: str = "" - +class ParsedNode(ParsedNodeResource, NodeInfoMixin, ParsedNodeMandatory, SerializableType): def get_target_write_path(self, target_path: str, subdirectory: str): # This is called for both the "compiled" subdirectory of "target" and the "run" subdirectory if os.path.basename(self.path) == os.path.basename(self.original_file_path): From 9c289304a0e6a2bda96ff092e1b4718c6675d552 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 6 Feb 2024 18:02:03 -0500 Subject: [PATCH 06/20] Move CompiledNode --- core/dbt/artifacts/resources/__init__.py | 3 ++ core/dbt/artifacts/resources/v1/components.py | 34 +++++++++++++++++++ core/dbt/contracts/graph/nodes.py | 33 ++---------------- core/dbt/parser/base.py | 3 +- core/dbt/tests/__init__.py | 0 tests/unit/test_contracts_graph_compiled.py | 2 +- 6 files changed, 43 insertions(+), 32 deletions(-) delete mode 100644 core/dbt/tests/__init__.py diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index baddccfdca4..764e2444651 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -9,6 +9,9 @@ ParsedNodeMandatory, ParsedNode, ColumnInfo, + CompiledNode, + InjectedCTE, + Contract, ) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index cf9730a1630..e0a79158ce1 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -68,6 +68,21 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin): _extra: Dict[str, Any] = field(default_factory=dict) +@dataclass +class InjectedCTE(dbtClassMixin): + """Used in CompiledNodes as part of ephemeral model processing""" + + id: str + sql: str + + +@dataclass +class Contract(dbtClassMixin): + enforced: bool = False + alias_types: bool = True + checksum: Optional[str] = None + + @dataclass class HasRelationMetadata(dbtClassMixin): database: Optional[str] @@ -118,3 +133,22 @@ class ParsedNode(ParsedNodeMandatory): config_call_dict: Dict[str, Any] = field(default_factory=dict) relation_name: Optional[str] = None raw_code: str = "" + + +@dataclass +class CompiledNode(ParsedNode): + """Contains attributes necessary for SQL files and nodes with refs, sources, etc, + so all ManifestNodes except SeedNode.""" + + language: str = "sql" + refs: List[RefArgs] = field(default_factory=list) + sources: List[List[str]] = field(default_factory=list) + metrics: List[List[str]] = field(default_factory=list) + depends_on: DependsOn = field(default_factory=DependsOn) + compiled_path: Optional[str] = None + compiled: bool = False + compiled_code: Optional[str] = None + extra_ctes_injected: bool = False + extra_ctes: List[InjectedCTE] = field(default_factory=list) + _pre_injected_sql: Optional[str] = None + contract: Contract = field(default_factory=Contract) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 60b0da12889..15e6461f538 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -86,15 +86,16 @@ NodeVersion, Group as GroupResource, GraphResource, - RefArgs as RefArgsResource, SavedQuery as SavedQueryResource, SemanticModel as SemanticModelResource, ParsedNodeMandatory as ParsedNodeMandatoryResource, ParsedNode as ParsedNodeResource, + CompiledNode as CompiledNodeResource, HasRelationMetadata, FileHash, NodeConfig, ColumnInfo, + InjectedCTE, ) # ===================================================================== @@ -176,13 +177,6 @@ def same_fqn(self, other) -> bool: return self.fqn == other.fqn -@dataclass -class Contract(dbtClassMixin, Replaceable): - enforced: bool = False - alias_types: bool = True - checksum: Optional[str] = None - - # Metrics, exposures, @dataclass class DeferRelation(HasRelationMetadata): @@ -378,31 +372,10 @@ def is_external_node(self): @dataclass -class InjectedCTE(dbtClassMixin, Replaceable): - """Used in CompiledNodes as part of ephemeral model processing""" - - id: str - sql: str - - -@dataclass -class CompiledNode(ParsedNode): +class CompiledNode(CompiledNodeResource, ParsedNode): """Contains attributes necessary for SQL files and nodes with refs, sources, etc, so all ManifestNodes except SeedNode.""" - language: str = "sql" - refs: List[RefArgsResource] = field(default_factory=list) - sources: List[List[str]] = field(default_factory=list) - metrics: List[List[str]] = field(default_factory=list) - depends_on: DependsOn = field(default_factory=DependsOn) - compiled_path: Optional[str] = None - compiled: bool = False - compiled_code: Optional[str] = None - extra_ctes_injected: bool = False - extra_ctes: List[InjectedCTE] = field(default_factory=list) - _pre_injected_sql: Optional[str] = None - contract: Contract = field(default_factory=Contract) - @property def empty(self): return not self.raw_code.strip() diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 8dbfc4cb3a4..61e34237e5c 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -12,11 +12,12 @@ generate_generate_name_macro_context, ) from dbt.adapters.factory import get_adapter # noqa: F401 +from dbt.artifacts.resources import Contract from dbt.clients.jinja import get_rendered from dbt.config import Project, RuntimeConfig from dbt.context.context_config import ContextConfig from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.nodes import Contract, BaseNode, ManifestNode +from dbt.contracts.graph.nodes import BaseNode, ManifestNode from dbt.contracts.graph.unparsed import Docs, UnparsedNode from dbt.exceptions import ( DbtInternalError, diff --git a/core/dbt/tests/__init__.py b/core/dbt/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/test_contracts_graph_compiled.py b/tests/unit/test_contracts_graph_compiled.py index 18b2bdea7be..3b42719b2e3 100644 --- a/tests/unit/test_contracts_graph_compiled.py +++ b/tests/unit/test_contracts_graph_compiled.py @@ -11,8 +11,8 @@ ModelConfig, TestConfig, TestMetadata, - Contract, ) +from dbt.artifacts.resources import Contract from dbt.node_types import NodeType from .utils import ( From a8f369031cc02a9c7fc1d43a5abbe718d0fcd870 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 6 Feb 2024 18:25:12 -0500 Subject: [PATCH 07/20] AnalysisNode, HookNode, ModelNode, ModelConfig --- core/dbt/artifacts/resources/__init__.py | 3 ++ core/dbt/artifacts/resources/v1/compiled.py | 34 +++++++++++++++ core/dbt/artifacts/resources/v1/components.py | 10 +++++ core/dbt/artifacts/resources/v1/config.py | 10 ++++- core/dbt/contracts/graph/model_config.py | 10 +---- core/dbt/contracts/graph/nodes.py | 42 +++++-------------- core/dbt/parser/unit_tests.py | 3 +- tests/unit/test_contracts_graph_parsed.py | 2 +- tests/unit/test_parser.py | 3 +- 9 files changed, 73 insertions(+), 44 deletions(-) create mode 100644 core/dbt/artifacts/resources/v1/compiled.py diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 764e2444651..36e6acae8e0 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -12,7 +12,9 @@ CompiledNode, InjectedCTE, Contract, + DeferRelation, ) +from dbt.artifacts.resources.v1.compiled import AnalysisNode, HookNode, ModelNode from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( Exposure, @@ -64,4 +66,5 @@ from dbt.artifacts.resources.v1.config import ( NodeAndTestConfig, NodeConfig, + ModelConfig, ) diff --git a/core/dbt/artifacts/resources/v1/compiled.py b/core/dbt/artifacts/resources/v1/compiled.py new file mode 100644 index 00000000000..1c0803fdfea --- /dev/null +++ b/core/dbt/artifacts/resources/v1/compiled.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass, field +from dbt.artifacts.resources.v1.components import ( + CompiledNode, + NodeVersion, + DeferRelation, +) +from dbt_common.contracts.constraints import ModelLevelConstraint +from dbt.artifacts.resources.v1.config import ModelConfig +from typing import Literal, Optional, List +from dbt.artifacts.resources.types import NodeType, AccessType +from datetime import datetime + + +@dataclass +class AnalysisNode(CompiledNode): + resource_type: Literal[NodeType.Analysis] + + +@dataclass +class HookNode(CompiledNode): + resource_type: Literal[NodeType.Operation] + index: Optional[int] = None + + +@dataclass +class ModelNode(CompiledNode): + resource_type: Literal[NodeType.Model] + access: AccessType = AccessType.Protected + config: ModelConfig = field(default_factory=ModelConfig) + constraints: List[ModelLevelConstraint] = field(default_factory=list) + version: Optional[NodeVersion] = None + latest_version: Optional[NodeVersion] = None + deprecation_date: Optional[datetime] = None + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index e0a79158ce1..525776e7836 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -106,6 +106,16 @@ def quoting_dict(self) -> Dict[str, bool]: return {} +@dataclass +class DeferRelation(HasRelationMetadata): + alias: str + relation_name: Optional[str] + + @property + def identifier(self): + return self.alias + + @dataclass class ParsedNodeMandatory(GraphResource, HasRelationMetadata): alias: str diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py index 4323465df56..7fa8299188c 100644 --- a/core/dbt/artifacts/resources/v1/config.py +++ b/core/dbt/artifacts/resources/v1/config.py @@ -9,7 +9,7 @@ from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt_common.contracts.config.materialization import OnConfigurationChangeOption from dbt.artifacts.resources.base import Docs -from dbt.artifacts.resources.types import ModelHookType +from dbt.artifacts.resources.types import ModelHookType, AccessType from dbt.contracts.graph.utils import validate_color from dbt import hooks @@ -146,3 +146,11 @@ def __pre_deserialize__(cls, data): if key in data: data[key] = [hooks.get_hook_dict(h) for h in data[key]] return data + + +@dataclass +class ModelConfig(NodeConfig): + access: AccessType = field( + default=AccessType.Protected, + metadata=MergeBehavior.Update.meta(), + ) diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 5e081c92fd4..dbe8aa4ed16 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -17,7 +17,7 @@ ValidationError, ) from dbt.contracts.util import Replaceable, list_str -from dbt.node_types import NodeType, AccessType +from dbt.node_types import NodeType from mashumaro.jsonschema.annotations import Pattern @@ -57,14 +57,6 @@ class SourceConfig(BaseConfig): enabled: bool = True -@dataclass -class ModelConfig(NodeConfig): - access: AccessType = field( - default=AccessType.Protected, - metadata=MergeBehavior.Update.meta(), - ) - - @dataclass class UnitTestNodeConfig(NodeConfig): expected_rows: List[Dict[str, Any]] = field(default_factory=list) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 15e6461f538..6f130c373ab 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -21,10 +21,7 @@ ) from dbt import deprecations -from dbt_common.contracts.constraints import ( - ConstraintType, - ModelLevelConstraint, -) +from dbt_common.contracts.constraints import ConstraintType from dbt_common.dataclass_schema import dbtClassMixin from dbt_common.clients.system import write_file @@ -63,7 +60,6 @@ ) from .model_config import ( - ModelConfig, SeedConfig, TestConfig, SourceConfig, @@ -96,6 +92,11 @@ NodeConfig, ColumnInfo, InjectedCTE, + AnalysisNode as AnalysisNodeResource, + HookNode as HookNodeResource, + ModelNode as ModelNodeResource, + DeferRelation, + ModelConfig, ) # ===================================================================== @@ -177,17 +178,6 @@ def same_fqn(self, other) -> bool: return self.fqn == other.fqn -# Metrics, exposures, -@dataclass -class DeferRelation(HasRelationMetadata): - alias: str - relation_name: Optional[str] - - @property - def identifier(self): - return self.alias - - @dataclass class ParsedNodeMandatory( ParsedNodeMandatoryResource, GraphNode[GraphResource], HasRelationMetadata, Replaceable @@ -422,27 +412,17 @@ def depends_on_macros(self): @dataclass -class AnalysisNode(CompiledNode): - resource_type: Literal[NodeType.Analysis] +class AnalysisNode(AnalysisNodeResource, CompiledNode): + pass @dataclass -class HookNode(CompiledNode): - resource_type: Literal[NodeType.Operation] - index: Optional[int] = None +class HookNode(HookNodeResource, CompiledNode): + pass @dataclass -class ModelNode(CompiledNode): - resource_type: Literal[NodeType.Model] - access: AccessType = AccessType.Protected - config: ModelConfig = field(default_factory=ModelConfig) - constraints: List[ModelLevelConstraint] = field(default_factory=list) - version: Optional[NodeVersion] = None - latest_version: Optional[NodeVersion] = None - deprecation_date: Optional[datetime] = None - defer_relation: Optional[DeferRelation] = None - +class ModelNode(ModelNodeResource, CompiledNode): @classmethod def from_args(cls, args: ModelNodeArgs) -> "ModelNode": unique_id = args.unique_id diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index bb98fdb6878..918c4c85c78 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -14,7 +14,8 @@ from dbt.context.providers import generate_parse_exposure, get_rendered from dbt.contracts.files import FileHash, SchemaSourceFile from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.model_config import UnitTestNodeConfig, ModelConfig +from dbt.contracts.graph.model_config import UnitTestNodeConfig +from dbt.artifacts.resources import ModelConfig from dbt.contracts.graph.nodes import ( ModelNode, UnitTestNode, diff --git a/tests/unit/test_contracts_graph_parsed.py b/tests/unit/test_contracts_graph_parsed.py index 0dfc74e8909..635a493397d 100644 --- a/tests/unit/test_contracts_graph_parsed.py +++ b/tests/unit/test_contracts_graph_parsed.py @@ -18,8 +18,8 @@ ) from dbt.node_types import NodeType, AccessType from dbt.contracts.files import FileHash +from dbt.artifacts.resources import ModelConfig from dbt.contracts.graph.model_config import ( - ModelConfig, NodeConfig, SeedConfig, TestConfig, diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index c949756eba8..89b2ca27de0 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -12,7 +12,8 @@ from dbt.context.context_config import ContextConfig from dbt.contracts.files import SourceFile, FileHash, FilePath, SchemaSourceFile from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.model_config import NodeConfig, TestConfig, SnapshotConfig, ModelConfig +from dbt.contracts.graph.model_config import NodeConfig, TestConfig, SnapshotConfig +from dbt.artifacts.resources import ModelConfig from dbt.contracts.graph.nodes import ( ModelNode, Macro, From 81479b1f7ea78ff07711d0f5bd4e0d566f6be7fe Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 6 Feb 2024 18:40:44 -0500 Subject: [PATCH 08/20] SqlNode and SeedNode --- core/dbt/artifacts/resources/__init__.py | 9 ++++++- core/dbt/artifacts/resources/v1/compiled.py | 20 ++++++++++++++- core/dbt/artifacts/resources/v1/config.py | 13 ++++++++++ core/dbt/contracts/graph/model_config.py | 20 +-------------- core/dbt/contracts/graph/nodes.py | 27 ++++----------------- tests/unit/test_contracts_graph_parsed.py | 2 +- tests/unit/test_graph_selector_methods.py | 2 +- 7 files changed, 48 insertions(+), 45 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 36e6acae8e0..a5ff68a01a0 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -14,7 +14,13 @@ Contract, DeferRelation, ) -from dbt.artifacts.resources.v1.compiled import AnalysisNode, HookNode, ModelNode +from dbt.artifacts.resources.v1.compiled import ( + AnalysisNode, + HookNode, + ModelNode, + SqlNode, + SeedNode, +) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( Exposure, @@ -67,4 +73,5 @@ NodeAndTestConfig, NodeConfig, ModelConfig, + SeedConfig, ) diff --git a/core/dbt/artifacts/resources/v1/compiled.py b/core/dbt/artifacts/resources/v1/compiled.py index 1c0803fdfea..795df075032 100644 --- a/core/dbt/artifacts/resources/v1/compiled.py +++ b/core/dbt/artifacts/resources/v1/compiled.py @@ -1,11 +1,13 @@ from dataclasses import dataclass, field from dbt.artifacts.resources.v1.components import ( + ParsedNode, CompiledNode, NodeVersion, DeferRelation, + MacroDependsOn, ) from dbt_common.contracts.constraints import ModelLevelConstraint -from dbt.artifacts.resources.v1.config import ModelConfig +from dbt.artifacts.resources.v1.config import ModelConfig, SeedConfig from typing import Literal, Optional, List from dbt.artifacts.resources.types import NodeType, AccessType from datetime import datetime @@ -32,3 +34,19 @@ class ModelNode(CompiledNode): latest_version: Optional[NodeVersion] = None deprecation_date: Optional[datetime] = None defer_relation: Optional[DeferRelation] = None + + +@dataclass +class SqlNode(CompiledNode): + resource_type: Literal[NodeType.SqlOperation] + + +@dataclass +class SeedNode(ParsedNode): # No SQLDefaults! + resource_type: Literal[NodeType.Seed] + config: SeedConfig = field(default_factory=SeedConfig) + # seeds need the root_path because the contents are not loaded initially + # and we need the root_path to load the seed later + root_path: Optional[str] = None + depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py index 7fa8299188c..ceadf5186c9 100644 --- a/core/dbt/artifacts/resources/v1/config.py +++ b/core/dbt/artifacts/resources/v1/config.py @@ -154,3 +154,16 @@ class ModelConfig(NodeConfig): default=AccessType.Protected, metadata=MergeBehavior.Update.meta(), ) + + +@dataclass +class SeedConfig(NodeConfig): + materialized: str = "seed" + delimiter: str = "," + quote_columns: Optional[bool] = None + + @classmethod + def validate(cls, data): + super().validate(data) + if data.get("materialized") and data.get("materialized") != "seed": + raise ValidationError("A seed must have a materialized value of 'seed'") diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index dbe8aa4ed16..4176b1388a9 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -9,6 +9,7 @@ SemanticModelConfig, NodeAndTestConfig, NodeConfig, + SeedConfig, ) from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior from dbt_common.contracts.config.metadata import Metadata, ShowBehavior @@ -39,12 +40,6 @@ class Severity(str): pass -@dataclass -class ContractConfig(dbtClassMixin, Replaceable): - enforced: bool = False - alias_types: bool = True - - @dataclass class Hook(dbtClassMixin, Replaceable): sql: str @@ -62,19 +57,6 @@ class UnitTestNodeConfig(NodeConfig): expected_rows: List[Dict[str, Any]] = field(default_factory=list) -@dataclass -class SeedConfig(NodeConfig): - materialized: str = "seed" - delimiter: str = "," - quote_columns: Optional[bool] = None - - @classmethod - def validate(cls, data): - super().validate(data) - if data.get("materialized") and data.get("materialized") != "seed": - raise ValidationError("A seed must have a materialized value of 'seed'") - - SEVERITY_PATTERN = r"^([Ww][Aa][Rr][Nn]|[Ee][Rr][Rr][Oo][Rr])$" diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 6f130c373ab..12ef240d10c 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -60,7 +60,6 @@ ) from .model_config import ( - SeedConfig, TestConfig, SourceConfig, EmptySnapshotConfig, @@ -74,7 +73,6 @@ DependsOn, Docs, Exposure as ExposureResource, - MacroDependsOn, MacroArgument, Documentation as DocumentationResource, Macro as MacroResource, @@ -97,6 +95,8 @@ ModelNode as ModelNodeResource, DeferRelation, ModelConfig, + SqlNode as SqlNodeResource, + SeedNode as SeedNodeResource, ) # ===================================================================== @@ -263,8 +263,6 @@ def _deserialize(cls, dct: Dict[str, int]): return AnalysisNode.from_dict(dct) elif resource_type == "seed": return SeedNode.from_dict(dct) - elif resource_type == "rpc": - return RPCNode.from_dict(dct) elif resource_type == "sql": return SqlNode.from_dict(dct) elif resource_type == "test": @@ -699,15 +697,9 @@ def same_contract(self, old, adapter_type=None) -> bool: return False -# TODO: rm? @dataclass -class RPCNode(CompiledNode): - resource_type: Literal[NodeType.RPCCall] - - -@dataclass -class SqlNode(CompiledNode): - resource_type: Literal[NodeType.SqlOperation] +class SqlNode(SqlNodeResource, CompiledNode): + pass # ==================================== @@ -716,15 +708,7 @@ class SqlNode(CompiledNode): @dataclass -class SeedNode(ParsedNode): # No SQLDefaults! - resource_type: Literal[NodeType.Seed] - config: SeedConfig = field(default_factory=SeedConfig) - # seeds need the root_path because the contents are not loaded initially - # and we need the root_path to load the seed later - root_path: Optional[str] = None - depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) - defer_relation: Optional[DeferRelation] = None - +class SeedNode(SeedNodeResource, ParsedNode): # No SQLDefaults! def same_seeds(self, other: "SeedNode") -> bool: # for seeds, we check the hashes. If the hashes are different types, # no match. If the hashes are both the same 'path', log a warning and @@ -1582,7 +1566,6 @@ class ParsedMacroPatch(ParsedPatch): SingularTestNode, HookNode, ModelNode, - RPCNode, SqlNode, GenericTestNode, SnapshotNode, diff --git a/tests/unit/test_contracts_graph_parsed.py b/tests/unit/test_contracts_graph_parsed.py index 635a493397d..c9c1dd70c06 100644 --- a/tests/unit/test_contracts_graph_parsed.py +++ b/tests/unit/test_contracts_graph_parsed.py @@ -15,6 +15,7 @@ MetricTypeParams, Owner, RefArgs, + MacroDependsOn, ) from dbt.node_types import NodeType, AccessType from dbt.contracts.files import FileHash @@ -40,7 +41,6 @@ Metric, SeedNode, Docs, - MacroDependsOn, SourceDefinition, Documentation, HookNode, diff --git a/tests/unit/test_graph_selector_methods.py b/tests/unit/test_graph_selector_methods.py index e1aaacc810b..a17532fcf7d 100644 --- a/tests/unit/test_graph_selector_methods.py +++ b/tests/unit/test_graph_selector_methods.py @@ -8,7 +8,6 @@ from dbt.contracts.files import FileHash from dbt.contracts.graph.nodes import ( DependsOn, - MacroDependsOn, NodeConfig, Macro, ModelNode, @@ -35,6 +34,7 @@ NodeRelation, Owner, QueryParams, + MacroDependsOn, ) from dbt.contracts.graph.unparsed import ( UnitTestInputFixture, From e4d554f3ce530cfcb79a549e95f64b66a2b5b9ed Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 7 Feb 2024 10:22:57 -0500 Subject: [PATCH 09/20] Move SingularTestNode and TestConfig --- core/dbt/artifacts/resources/__init__.py | 2 + core/dbt/artifacts/resources/v1/compiled.py | 10 +- core/dbt/artifacts/resources/v1/config.py | 107 +++++++++++++++++++ core/dbt/contracts/graph/model_config.py | 109 +------------------- core/dbt/contracts/graph/nodes.py | 10 +- 5 files changed, 122 insertions(+), 116 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index a5ff68a01a0..227cd6a1f0c 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -20,6 +20,7 @@ ModelNode, SqlNode, SeedNode, + SingularTestNode, ) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( @@ -74,4 +75,5 @@ NodeConfig, ModelConfig, SeedConfig, + TestConfig, ) diff --git a/core/dbt/artifacts/resources/v1/compiled.py b/core/dbt/artifacts/resources/v1/compiled.py index 795df075032..a1b00135597 100644 --- a/core/dbt/artifacts/resources/v1/compiled.py +++ b/core/dbt/artifacts/resources/v1/compiled.py @@ -7,7 +7,7 @@ MacroDependsOn, ) from dbt_common.contracts.constraints import ModelLevelConstraint -from dbt.artifacts.resources.v1.config import ModelConfig, SeedConfig +from dbt.artifacts.resources.v1.config import ModelConfig, SeedConfig, TestConfig from typing import Literal, Optional, List from dbt.artifacts.resources.types import NodeType, AccessType from datetime import datetime @@ -50,3 +50,11 @@ class SeedNode(ParsedNode): # No SQLDefaults! root_path: Optional[str] = None depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) defer_relation: Optional[DeferRelation] = None + + +@dataclass +class SingularTestNode(CompiledNode): + resource_type: Literal[NodeType.Test] + # Was not able to make mypy happy and keep the code working. We need to + # refactor the various configs. + config: TestConfig = field(default_factory=TestConfig) # type: ignore diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py index ceadf5186c9..51d8a19d0c2 100644 --- a/core/dbt/artifacts/resources/v1/config.py +++ b/core/dbt/artifacts/resources/v1/config.py @@ -1,5 +1,6 @@ from dbt_common.dataclass_schema import dbtClassMixin, ValidationError from typing import Optional, List, Any, Dict, Union +from typing_extensions import Annotated from dataclasses import dataclass, field from dbt_common.contracts.config.base import ( BaseConfig, @@ -12,12 +13,17 @@ from dbt.artifacts.resources.types import ModelHookType, AccessType from dbt.contracts.graph.utils import validate_color from dbt import hooks +from mashumaro.jsonschema.annotations import Pattern def list_str() -> List[str]: return [] +class Severity(str): + pass + + def metas(*metas: Metadata) -> Dict[str, Any]: existing: Dict[str, Any] = {} for m in metas: @@ -167,3 +173,104 @@ def validate(cls, data): super().validate(data) if data.get("materialized") and data.get("materialized") != "seed": raise ValidationError("A seed must have a materialized value of 'seed'") + + +SEVERITY_PATTERN = r"^([Ww][Aa][Rr][Nn]|[Ee][Rr][Rr][Oo][Rr])$" + + +@dataclass +class TestConfig(NodeAndTestConfig): + __test__ = False + + # this is repeated because of a different default + schema: Optional[str] = field( + default="dbt_test__audit", + metadata=CompareBehavior.Exclude.meta(), + ) + materialized: str = "test" + # Annotated is used by mashumaro for jsonschema generation + severity: Annotated[Severity, Pattern(SEVERITY_PATTERN)] = Severity("ERROR") + store_failures: Optional[bool] = None + store_failures_as: Optional[str] = None + where: Optional[str] = None + limit: Optional[int] = None + fail_calc: str = "count(*)" + warn_if: str = "!= 0" + error_if: str = "!= 0" + + def __post_init__(self): + """ + The presence of a setting for `store_failures_as` overrides any existing setting for `store_failures`, + regardless of level of granularity. If `store_failures_as` is not set, then `store_failures` takes effect. + At the time of implementation, `store_failures = True` would always create a table; the user could not + configure this. Hence, if `store_failures = True` and `store_failures_as` is not specified, then it + should be set to "table" to mimic the existing functionality. + + A side effect of this overriding functionality is that `store_failures_as="view"` at the project + level cannot be turned off at the model level without setting both `store_failures_as` and + `store_failures`. The former would cascade down and override `store_failures=False`. The proposal + is to include "ephemeral" as a value for `store_failures_as`, which effectively sets + `store_failures=False`. + + The exception handling for this is tricky. If we raise an exception here, the entire run fails at + parse time. We would rather well-formed models run successfully, leaving only exceptions to be rerun + if necessary. Hence, the exception needs to be raised in the test materialization. In order to do so, + we need to make sure that we go down the `store_failures = True` route with the invalid setting for + `store_failures_as`. This results in the `.get()` defaulted to `True` below, instead of a normal + dictionary lookup as is done in the `if` block. Refer to the test materialization for the + exception that is raise as a result of an invalid value. + + The intention of this block is to behave as if `store_failures_as` is the only setting, + but still allow for backwards compatibility for `store_failures`. + See https://github.com/dbt-labs/dbt-core/issues/6914 for more information. + """ + + # if `store_failures_as` is not set, it gets set by `store_failures` + # the settings below mimic existing behavior prior to `store_failures_as` + get_store_failures_as_map = { + True: "table", + False: "ephemeral", + None: None, + } + + # if `store_failures_as` is set, it dictates what `store_failures` gets set to + # the settings below overrides whatever `store_failures` is set to by the user + get_store_failures_map = { + "ephemeral": False, + "table": True, + "view": True, + } + + if self.store_failures_as is None: + self.store_failures_as = get_store_failures_as_map[self.store_failures] + else: + self.store_failures = get_store_failures_map.get(self.store_failures_as, True) + + @classmethod + def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> bool: + """This is like __eq__, except it explicitly checks certain fields.""" + modifiers = [ + "severity", + "where", + "limit", + "fail_calc", + "warn_if", + "error_if", + "store_failures", + "store_failures_as", + ] + + seen = set() + for _, target_name in cls._get_fields(): + key = target_name + seen.add(key) + if key in modifiers: + if not cls.compare_key(unrendered, other, key): + return False + return True + + @classmethod + def validate(cls, data): + super().validate(data) + if data.get("materialized") and data.get("materialized") != "test": + raise ValidationError("A test must have a materialized value of 'test'") diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 4176b1388a9..c32a1f62f47 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -1,15 +1,14 @@ from dataclasses import field, dataclass from typing import Any, List, Optional, Dict, Union, Type -from typing_extensions import Annotated from dbt.artifacts.resources import ( ExposureConfig, MetricConfig, SavedQueryConfig, SemanticModelConfig, - NodeAndTestConfig, NodeConfig, SeedConfig, + TestConfig, ) from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior from dbt_common.contracts.config.metadata import Metadata, ShowBehavior @@ -19,7 +18,6 @@ ) from dbt.contracts.util import Replaceable, list_str from dbt.node_types import NodeType -from mashumaro.jsonschema.annotations import Pattern def metas(*metas: Metadata) -> Dict[str, Any]: @@ -36,10 +34,6 @@ def insensitive_patterns(*patterns: str): return "^({})$".format("|".join(lowercased)) -class Severity(str): - pass - - @dataclass class Hook(dbtClassMixin, Replaceable): sql: str @@ -57,107 +51,6 @@ class UnitTestNodeConfig(NodeConfig): expected_rows: List[Dict[str, Any]] = field(default_factory=list) -SEVERITY_PATTERN = r"^([Ww][Aa][Rr][Nn]|[Ee][Rr][Rr][Oo][Rr])$" - - -@dataclass -class TestConfig(NodeAndTestConfig): - __test__ = False - - # this is repeated because of a different default - schema: Optional[str] = field( - default="dbt_test__audit", - metadata=CompareBehavior.Exclude.meta(), - ) - materialized: str = "test" - # Annotated is used by mashumaro for jsonschema generation - severity: Annotated[Severity, Pattern(SEVERITY_PATTERN)] = Severity("ERROR") - store_failures: Optional[bool] = None - store_failures_as: Optional[str] = None - where: Optional[str] = None - limit: Optional[int] = None - fail_calc: str = "count(*)" - warn_if: str = "!= 0" - error_if: str = "!= 0" - - def __post_init__(self): - """ - The presence of a setting for `store_failures_as` overrides any existing setting for `store_failures`, - regardless of level of granularity. If `store_failures_as` is not set, then `store_failures` takes effect. - At the time of implementation, `store_failures = True` would always create a table; the user could not - configure this. Hence, if `store_failures = True` and `store_failures_as` is not specified, then it - should be set to "table" to mimic the existing functionality. - - A side effect of this overriding functionality is that `store_failures_as="view"` at the project - level cannot be turned off at the model level without setting both `store_failures_as` and - `store_failures`. The former would cascade down and override `store_failures=False`. The proposal - is to include "ephemeral" as a value for `store_failures_as`, which effectively sets - `store_failures=False`. - - The exception handling for this is tricky. If we raise an exception here, the entire run fails at - parse time. We would rather well-formed models run successfully, leaving only exceptions to be rerun - if necessary. Hence, the exception needs to be raised in the test materialization. In order to do so, - we need to make sure that we go down the `store_failures = True` route with the invalid setting for - `store_failures_as`. This results in the `.get()` defaulted to `True` below, instead of a normal - dictionary lookup as is done in the `if` block. Refer to the test materialization for the - exception that is raise as a result of an invalid value. - - The intention of this block is to behave as if `store_failures_as` is the only setting, - but still allow for backwards compatibility for `store_failures`. - See https://github.com/dbt-labs/dbt-core/issues/6914 for more information. - """ - - # if `store_failures_as` is not set, it gets set by `store_failures` - # the settings below mimic existing behavior prior to `store_failures_as` - get_store_failures_as_map = { - True: "table", - False: "ephemeral", - None: None, - } - - # if `store_failures_as` is set, it dictates what `store_failures` gets set to - # the settings below overrides whatever `store_failures` is set to by the user - get_store_failures_map = { - "ephemeral": False, - "table": True, - "view": True, - } - - if self.store_failures_as is None: - self.store_failures_as = get_store_failures_as_map[self.store_failures] - else: - self.store_failures = get_store_failures_map.get(self.store_failures_as, True) - - @classmethod - def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> bool: - """This is like __eq__, except it explicitly checks certain fields.""" - modifiers = [ - "severity", - "where", - "limit", - "fail_calc", - "warn_if", - "error_if", - "store_failures", - "store_failures_as", - ] - - seen = set() - for _, target_name in cls._get_fields(): - key = target_name - seen.add(key) - if key in modifiers: - if not cls.compare_key(unrendered, other, key): - return False - return True - - @classmethod - def validate(cls, data): - super().validate(data) - if data.get("materialized") and data.get("materialized") != "test": - raise ValidationError("A test must have a materialized value of 'test'") - - @dataclass class EmptySnapshotConfig(NodeConfig): materialized: str = "snapshot" diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 12ef240d10c..36e4a49f3c4 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -60,7 +60,6 @@ ) from .model_config import ( - TestConfig, SourceConfig, EmptySnapshotConfig, SnapshotConfig, @@ -95,8 +94,10 @@ ModelNode as ModelNodeResource, DeferRelation, ModelConfig, + TestConfig, SqlNode as SqlNodeResource, SeedNode as SeedNodeResource, + SingularTestNode as SingularTestNodeResource, ) # ===================================================================== @@ -826,12 +827,7 @@ def is_relational(self): @dataclass -class SingularTestNode(TestShouldStoreFailures, CompiledNode): - resource_type: Literal[NodeType.Test] - # Was not able to make mypy happy and keep the code working. We need to - # refactor the various configs. - config: TestConfig = field(default_factory=TestConfig) # type: ignore - +class SingularTestNode(SingularTestNodeResource, TestShouldStoreFailures, CompiledNode): @property def test_node_type(self): return "singular" From 6029dcfd1f8728ce1388c17e9b7cb3c3d520fdbe Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 7 Feb 2024 10:42:49 -0500 Subject: [PATCH 10/20] Move GenericTestNode --- core/dbt/artifacts/resources/__init__.py | 4 ++- .../v1/{compiled.py => manifest_nodes.py} | 27 +++++++++++++++- core/dbt/contracts/graph/nodes.py | 32 ++----------------- tests/unit/test_contracts_graph_compiled.py | 4 +-- tests/unit/test_contracts_graph_parsed.py | 2 +- tests/unit/test_graph_selector_methods.py | 4 +-- 6 files changed, 35 insertions(+), 38 deletions(-) rename core/dbt/artifacts/resources/v1/{compiled.py => manifest_nodes.py} (65%) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 227cd6a1f0c..f12b95f5a05 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -14,13 +14,15 @@ Contract, DeferRelation, ) -from dbt.artifacts.resources.v1.compiled import ( +from dbt.artifacts.resources.v1.manifest_nodes import ( AnalysisNode, HookNode, ModelNode, SqlNode, SeedNode, SingularTestNode, + TestMetadata, + GenericTestNode, ) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( diff --git a/core/dbt/artifacts/resources/v1/compiled.py b/core/dbt/artifacts/resources/v1/manifest_nodes.py similarity index 65% rename from core/dbt/artifacts/resources/v1/compiled.py rename to core/dbt/artifacts/resources/v1/manifest_nodes.py index a1b00135597..5be56354f3a 100644 --- a/core/dbt/artifacts/resources/v1/compiled.py +++ b/core/dbt/artifacts/resources/v1/manifest_nodes.py @@ -1,4 +1,5 @@ from dataclasses import dataclass, field +from dbt_common.dataclass_schema import dbtClassMixin from dbt.artifacts.resources.v1.components import ( ParsedNode, CompiledNode, @@ -8,7 +9,7 @@ ) from dbt_common.contracts.constraints import ModelLevelConstraint from dbt.artifacts.resources.v1.config import ModelConfig, SeedConfig, TestConfig -from typing import Literal, Optional, List +from typing import Literal, Optional, List, Dict, Any from dbt.artifacts.resources.types import NodeType, AccessType from datetime import datetime @@ -58,3 +59,27 @@ class SingularTestNode(CompiledNode): # Was not able to make mypy happy and keep the code working. We need to # refactor the various configs. config: TestConfig = field(default_factory=TestConfig) # type: ignore + + +@dataclass +class TestMetadata(dbtClassMixin): + __test__ = False + + name: str = "test" # dummy default to allow default in GenericTestNode. Should always be set. + # kwargs are the args that are left in the test builder after + # removing configs. They are set from the test builder when + # the test node is created. + kwargs: Dict[str, Any] = field(default_factory=dict) + namespace: Optional[str] = None + + +@dataclass +class GenericTestNode(CompiledNode): + resource_type: Literal[NodeType.Test] + column_name: Optional[str] = None + file_key_name: Optional[str] = None + # Was not able to make mypy happy and keep the code working. We need to + # refactor the various configs. + config: TestConfig = field(default_factory=TestConfig) # type: ignore + attached_node: Optional[str] = None + test_metadata: TestMetadata = field(default_factory=TestMetadata) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 36e4a49f3c4..595bec30ed7 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -22,7 +22,6 @@ from dbt import deprecations from dbt_common.contracts.constraints import ConstraintType -from dbt_common.dataclass_schema import dbtClassMixin from dbt_common.clients.system import write_file from dbt.contracts.graph.unparsed import ( @@ -94,10 +93,10 @@ ModelNode as ModelNodeResource, DeferRelation, ModelConfig, - TestConfig, SqlNode as SqlNodeResource, SeedNode as SeedNodeResource, SingularTestNode as SingularTestNodeResource, + GenericTestNode as GenericTestNodeResource, ) # ===================================================================== @@ -839,34 +838,7 @@ def test_node_type(self): @dataclass -class TestMetadata(dbtClassMixin, Replaceable): - __test__ = False - - name: str - # kwargs are the args that are left in the test builder after - # removing configs. They are set from the test builder when - # the test node is created. - kwargs: Dict[str, Any] = field(default_factory=dict) - namespace: Optional[str] = None - - -# This has to be separated out because it has no default and so -# has to be included as a superclass, not an attribute -@dataclass -class HasTestMetadata(dbtClassMixin): - test_metadata: TestMetadata - - -@dataclass -class GenericTestNode(TestShouldStoreFailures, CompiledNode, HasTestMetadata): - resource_type: Literal[NodeType.Test] - column_name: Optional[str] = None - file_key_name: Optional[str] = None - # Was not able to make mypy happy and keep the code working. We need to - # refactor the various configs. - config: TestConfig = field(default_factory=TestConfig) # type: ignore - attached_node: Optional[str] = None - +class GenericTestNode(GenericTestNodeResource, TestShouldStoreFailures, CompiledNode): def same_contents(self, other, adapter_type: Optional[str]) -> bool: if other is None: return False diff --git a/tests/unit/test_contracts_graph_compiled.py b/tests/unit/test_contracts_graph_compiled.py index 3b42719b2e3..c3835841190 100644 --- a/tests/unit/test_contracts_graph_compiled.py +++ b/tests/unit/test_contracts_graph_compiled.py @@ -9,10 +9,8 @@ InjectedCTE, ModelNode, ModelConfig, - TestConfig, - TestMetadata, ) -from dbt.artifacts.resources import Contract +from dbt.artifacts.resources import Contract, TestConfig, TestMetadata from dbt.node_types import NodeType from .utils import ( diff --git a/tests/unit/test_contracts_graph_parsed.py b/tests/unit/test_contracts_graph_parsed.py index c9c1dd70c06..09d34399b5c 100644 --- a/tests/unit/test_contracts_graph_parsed.py +++ b/tests/unit/test_contracts_graph_parsed.py @@ -16,6 +16,7 @@ Owner, RefArgs, MacroDependsOn, + TestMetadata, ) from dbt.node_types import NodeType, AccessType from dbt.contracts.files import FileHash @@ -44,7 +45,6 @@ SourceDefinition, Documentation, HookNode, - TestMetadata, SemanticModel, ) from dbt.contracts.graph.unparsed import ( diff --git a/tests/unit/test_graph_selector_methods.py b/tests/unit/test_graph_selector_methods.py index a17532fcf7d..1dae8cdab95 100644 --- a/tests/unit/test_graph_selector_methods.py +++ b/tests/unit/test_graph_selector_methods.py @@ -20,8 +20,6 @@ SingularTestNode, GenericTestNode, SourceDefinition, - TestConfig, - TestMetadata, ColumnInfo, AccessType, UnitTestDefinition, @@ -35,6 +33,8 @@ Owner, QueryParams, MacroDependsOn, + TestConfig, + TestMetadata, ) from dbt.contracts.graph.unparsed import ( UnitTestInputFixture, From 433c5569fc373109867fff8c9aed506cdd96f212 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 7 Feb 2024 10:56:19 -0500 Subject: [PATCH 11/20] Move SnapshotNode and SnapshotConfig --- core/dbt/artifacts/resources/__init__.py | 2 + core/dbt/artifacts/resources/v1/config.py | 53 ++++++++++++++++++ .../artifacts/resources/v1/manifest_nodes.py | 9 +++- core/dbt/contracts/graph/manifest.py | 3 +- core/dbt/contracts/graph/model_config.py | 54 +------------------ core/dbt/contracts/graph/nodes.py | 9 ++-- 6 files changed, 68 insertions(+), 62 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index f12b95f5a05..87aefb08ffb 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -23,6 +23,7 @@ SingularTestNode, TestMetadata, GenericTestNode, + SnapshotNode, ) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( @@ -78,4 +79,5 @@ ModelConfig, SeedConfig, TestConfig, + SnapshotConfig, ) diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py index 51d8a19d0c2..822e6c3fb6f 100644 --- a/core/dbt/artifacts/resources/v1/config.py +++ b/core/dbt/artifacts/resources/v1/config.py @@ -274,3 +274,56 @@ def validate(cls, data): super().validate(data) if data.get("materialized") and data.get("materialized") != "test": raise ValidationError("A test must have a materialized value of 'test'") + + +@dataclass +class SnapshotConfig(NodeConfig): + materialized: str = "snapshot" + strategy: Optional[str] = None + unique_key: Optional[str] = None + target_schema: Optional[str] = None + target_database: Optional[str] = None + updated_at: Optional[str] = None + # Not using Optional because of serialization issues with a Union of str and List[str] + check_cols: Union[str, List[str], None] = None + + @classmethod + def validate(cls, data): + super().validate(data) + # Note: currently you can't just set these keys in schema.yml because this validation + # will fail when parsing the snapshot node. + if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"): + raise ValidationError( + "Snapshots must be configured with a 'strategy', 'unique_key', " + "and 'target_schema'." + ) + if data.get("strategy") == "check": + if not data.get("check_cols"): + raise ValidationError( + "A snapshot configured with the check strategy must " + "specify a check_cols configuration." + ) + if isinstance(data["check_cols"], str) and data["check_cols"] != "all": + raise ValidationError( + f"Invalid value for 'check_cols': {data['check_cols']}. " + "Expected 'all' or a list of strings." + ) + elif data.get("strategy") == "timestamp": + if not data.get("updated_at"): + raise ValidationError( + "A snapshot configured with the timestamp strategy " + "must specify an updated_at configuration." + ) + if data.get("check_cols"): + raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'") + # If the strategy is not 'check' or 'timestamp' it's a custom strategy, + # formerly supported with GenericSnapshotConfig + + if data.get("materialized") and data.get("materialized") != "snapshot": + raise ValidationError("A snapshot must have a materialized value of 'snapshot'") + + # Called by "calculate_node_config_dict" in ContextConfigGenerator + def finalize_and_validate(self): + data = self.to_dict(omit_none=True) + self.validate(data) + return self.from_dict(data) diff --git a/core/dbt/artifacts/resources/v1/manifest_nodes.py b/core/dbt/artifacts/resources/v1/manifest_nodes.py index 5be56354f3a..1c749e4eda0 100644 --- a/core/dbt/artifacts/resources/v1/manifest_nodes.py +++ b/core/dbt/artifacts/resources/v1/manifest_nodes.py @@ -8,7 +8,7 @@ MacroDependsOn, ) from dbt_common.contracts.constraints import ModelLevelConstraint -from dbt.artifacts.resources.v1.config import ModelConfig, SeedConfig, TestConfig +from dbt.artifacts.resources.v1.config import ModelConfig, SeedConfig, TestConfig, SnapshotConfig from typing import Literal, Optional, List, Dict, Any from dbt.artifacts.resources.types import NodeType, AccessType from datetime import datetime @@ -83,3 +83,10 @@ class GenericTestNode(CompiledNode): config: TestConfig = field(default_factory=TestConfig) # type: ignore attached_node: Optional[str] = None test_metadata: TestMetadata = field(default_factory=TestMetadata) + + +@dataclass +class SnapshotNode(CompiledNode): + resource_type: Literal[NodeType.Snapshot] + config: SnapshotConfig + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 3ffd0e55fd7..d4254882257 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -34,7 +34,6 @@ ManifestNode, Metric, ModelNode, - DeferRelation, ResultNode, SavedQuery, SemanticModel, @@ -46,7 +45,7 @@ from dbt.contracts.graph.unparsed import SourcePatch, UnparsedVersion # to preserve import paths -from dbt.artifacts.resources import NodeVersion +from dbt.artifacts.resources import NodeVersion, DeferRelation from dbt.artifacts.schemas.manifest import WritableManifest, ManifestMetadata, UniqueID from dbt.contracts.files import ( SourceFile, diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index c32a1f62f47..9ce702c1736 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -9,12 +9,12 @@ NodeConfig, SeedConfig, TestConfig, + SnapshotConfig, ) from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt_common.dataclass_schema import ( dbtClassMixin, - ValidationError, ) from dbt.contracts.util import Replaceable, list_str from dbt.node_types import NodeType @@ -57,58 +57,6 @@ class EmptySnapshotConfig(NodeConfig): unique_key: Optional[str] = None # override NodeConfig unique_key definition -@dataclass -class SnapshotConfig(EmptySnapshotConfig): - strategy: Optional[str] = None - unique_key: Optional[str] = None - target_schema: Optional[str] = None - target_database: Optional[str] = None - updated_at: Optional[str] = None - # Not using Optional because of serialization issues with a Union of str and List[str] - check_cols: Union[str, List[str], None] = None - - @classmethod - def validate(cls, data): - super().validate(data) - # Note: currently you can't just set these keys in schema.yml because this validation - # will fail when parsing the snapshot node. - if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"): - raise ValidationError( - "Snapshots must be configured with a 'strategy', 'unique_key', " - "and 'target_schema'." - ) - if data.get("strategy") == "check": - if not data.get("check_cols"): - raise ValidationError( - "A snapshot configured with the check strategy must " - "specify a check_cols configuration." - ) - if isinstance(data["check_cols"], str) and data["check_cols"] != "all": - raise ValidationError( - f"Invalid value for 'check_cols': {data['check_cols']}. " - "Expected 'all' or a list of strings." - ) - elif data.get("strategy") == "timestamp": - if not data.get("updated_at"): - raise ValidationError( - "A snapshot configured with the timestamp strategy " - "must specify an updated_at configuration." - ) - if data.get("check_cols"): - raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'") - # If the strategy is not 'check' or 'timestamp' it's a custom strategy, - # formerly supported with GenericSnapshotConfig - - if data.get("materialized") and data.get("materialized") != "snapshot": - raise ValidationError("A snapshot must have a materialized value of 'snapshot'") - - # Called by "calculate_node_config_dict" in ContextConfigGenerator - def finalize_and_validate(self): - data = self.to_dict(omit_none=True) - self.validate(data) - return self.from_dict(data) - - @dataclass class UnitTestConfig(BaseConfig): tags: Union[str, List[str]] = field( diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 595bec30ed7..05f9da9b9bd 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -61,7 +61,6 @@ from .model_config import ( SourceConfig, EmptySnapshotConfig, - SnapshotConfig, UnitTestConfig, UnitTestNodeConfig, ) @@ -91,12 +90,12 @@ AnalysisNode as AnalysisNodeResource, HookNode as HookNodeResource, ModelNode as ModelNodeResource, - DeferRelation, ModelConfig, SqlNode as SqlNodeResource, SeedNode as SeedNodeResource, SingularTestNode as SingularTestNodeResource, GenericTestNode as GenericTestNodeResource, + SnapshotNode as SnapshotNodeResource, ) # ===================================================================== @@ -957,10 +956,8 @@ class IntermediateSnapshotNode(CompiledNode): @dataclass -class SnapshotNode(CompiledNode): - resource_type: Literal[NodeType.Snapshot] - config: SnapshotConfig - defer_relation: Optional[DeferRelation] = None +class SnapshotNode(SnapshotNodeResource, CompiledNode): + pass # ==================================== From 4d6c698e8315deda0399a01678f06dd01fd3c9ea Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 7 Feb 2024 12:23:49 -0500 Subject: [PATCH 12/20] Changie --- .changes/unreleased/Under the Hood-20240207-122342.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20240207-122342.yaml diff --git a/.changes/unreleased/Under the Hood-20240207-122342.yaml b/.changes/unreleased/Under the Hood-20240207-122342.yaml new file mode 100644 index 00000000000..f2e4a0ed3fe --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240207-122342.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Move manifest nodes to artifacts +time: 2024-02-07T12:23:42.909049-05:00 +custom: + Author: gshank + Issue: "9388" From d5e74021ef406907e49b0803ac218b0bb6080e4f Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Fri, 16 Feb 2024 15:03:34 -0500 Subject: [PATCH 13/20] Remove duplicate Hook class, use ModelConfig in config dictionary --- core/dbt/artifacts/resources/__init__.py | 1 + core/dbt/contracts/graph/model_config.py | 15 +++------------ core/dbt/task/run.py | 2 +- tests/unit/test_contracts_graph_parsed.py | 4 ++-- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index d1ff7488c72..e256acb91c0 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -84,6 +84,7 @@ TestConfig, SnapshotConfig, SourceConfig, + Hook, ) from dbt.artifacts.resources.v1.source_definition import ( diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index cd19cca2434..12753794859 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -11,13 +11,11 @@ TestConfig, SnapshotConfig, SourceConfig, + ModelConfig, ) from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior from dbt_common.contracts.config.metadata import Metadata, ShowBehavior -from dbt_common.dataclass_schema import ( - dbtClassMixin, -) -from dbt.contracts.util import Replaceable, list_str +from dbt.contracts.util import list_str from dbt.node_types import NodeType @@ -35,13 +33,6 @@ def insensitive_patterns(*patterns: str): return "^({})$".format("|".join(lowercased)) -@dataclass -class Hook(dbtClassMixin, Replaceable): - sql: str - transaction: bool = True - index: Optional[int] = None - - @dataclass class UnitTestNodeConfig(NodeConfig): expected_rows: List[Dict[str, Any]] = field(default_factory=list) @@ -73,7 +64,7 @@ class UnitTestConfig(BaseConfig): NodeType.Source: SourceConfig, NodeType.Seed: SeedConfig, NodeType.Test: TestConfig, - NodeType.Model: NodeConfig, + NodeType.Model: ModelConfig, NodeType.Snapshot: SnapshotConfig, NodeType.Unit: UnitTestConfig, } diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index aa8407694c1..83163c4a74e 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -17,10 +17,10 @@ from dbt.adapters.base import BaseRelation from dbt.clients.jinja import MacroGenerator from dbt.context.providers import generate_runtime_model_context -from dbt.contracts.graph.model_config import Hook from dbt.contracts.graph.nodes import HookNode, ResultNode from dbt.artifacts.schemas.results import NodeStatus, RunStatus, RunningStatus, BaseResult from dbt.artifacts.schemas.run import RunResult +from dbt.artifacts.resources import Hook from dbt.exceptions import ( CompilationError, DbtInternalError, diff --git a/tests/unit/test_contracts_graph_parsed.py b/tests/unit/test_contracts_graph_parsed.py index e4edb11fe9a..41ae5eabbfe 100644 --- a/tests/unit/test_contracts_graph_parsed.py +++ b/tests/unit/test_contracts_graph_parsed.py @@ -22,18 +22,18 @@ TestMetadata, SourceConfig, Time, + Hook, ) from dbt.artifacts.resources.types import TimePeriod from dbt.node_types import NodeType, AccessType from dbt.contracts.files import FileHash -from dbt.artifacts.resources import ModelConfig from dbt.contracts.graph.model_config import ( NodeConfig, SeedConfig, TestConfig, SnapshotConfig, EmptySnapshotConfig, - Hook, + ModelConfig, ) from dbt.contracts.graph.nodes import ( ModelNode, From 9cedcd33a25060936cb09be95cb63a27b49413c4 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Fri, 16 Feb 2024 17:43:08 -0500 Subject: [PATCH 14/20] Remove Node from the names of a bunch of resources --- core/dbt/artifacts/resources/__init__.py | 14 +++++----- .../artifacts/resources/v1/manifest_nodes.py | 14 +++++----- core/dbt/contracts/graph/nodes.py | 28 +++++++++---------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index e256acb91c0..4f71ef04eea 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -18,15 +18,15 @@ Time, ) from dbt.artifacts.resources.v1.manifest_nodes import ( - AnalysisNode, + Analysis, HookNode, - ModelNode, - SqlNode, - SeedNode, - SingularTestNode, + Model, + SqlOperation, + Seed, + SingularTest, TestMetadata, - GenericTestNode, - SnapshotNode, + GenericTest, + Snapshot, ) from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( diff --git a/core/dbt/artifacts/resources/v1/manifest_nodes.py b/core/dbt/artifacts/resources/v1/manifest_nodes.py index 1c749e4eda0..5b1abb0362a 100644 --- a/core/dbt/artifacts/resources/v1/manifest_nodes.py +++ b/core/dbt/artifacts/resources/v1/manifest_nodes.py @@ -15,7 +15,7 @@ @dataclass -class AnalysisNode(CompiledNode): +class Analysis(CompiledNode): resource_type: Literal[NodeType.Analysis] @@ -26,7 +26,7 @@ class HookNode(CompiledNode): @dataclass -class ModelNode(CompiledNode): +class Model(CompiledNode): resource_type: Literal[NodeType.Model] access: AccessType = AccessType.Protected config: ModelConfig = field(default_factory=ModelConfig) @@ -38,12 +38,12 @@ class ModelNode(CompiledNode): @dataclass -class SqlNode(CompiledNode): +class SqlOperation(CompiledNode): resource_type: Literal[NodeType.SqlOperation] @dataclass -class SeedNode(ParsedNode): # No SQLDefaults! +class Seed(ParsedNode): # No SQLDefaults! resource_type: Literal[NodeType.Seed] config: SeedConfig = field(default_factory=SeedConfig) # seeds need the root_path because the contents are not loaded initially @@ -54,7 +54,7 @@ class SeedNode(ParsedNode): # No SQLDefaults! @dataclass -class SingularTestNode(CompiledNode): +class SingularTest(CompiledNode): resource_type: Literal[NodeType.Test] # Was not able to make mypy happy and keep the code working. We need to # refactor the various configs. @@ -74,7 +74,7 @@ class TestMetadata(dbtClassMixin): @dataclass -class GenericTestNode(CompiledNode): +class GenericTest(CompiledNode): resource_type: Literal[NodeType.Test] column_name: Optional[str] = None file_key_name: Optional[str] = None @@ -86,7 +86,7 @@ class GenericTestNode(CompiledNode): @dataclass -class SnapshotNode(CompiledNode): +class Snapshot(CompiledNode): resource_type: Literal[NodeType.Snapshot] config: SnapshotConfig defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 4a7f58abfa4..dd341c54d82 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -80,15 +80,15 @@ NodeConfig, ColumnInfo, InjectedCTE, - AnalysisNode as AnalysisNodeResource, + Analysis as AnalysisResource, HookNode as HookNodeResource, - ModelNode as ModelNodeResource, + Model as ModelResource, ModelConfig, - SqlNode as SqlNodeResource, - SeedNode as SeedNodeResource, - SingularTestNode as SingularTestNodeResource, - GenericTestNode as GenericTestNodeResource, - SnapshotNode as SnapshotNodeResource, + SqlOperation as SqlOperationResource, + Seed as SeedResource, + SingularTest as SingularTestResource, + GenericTest as GenericTestResource, + Snapshot as SnapshotResource, Quoting as QuotingResource, SourceDefinition as SourceDefinitionResource, ) @@ -429,7 +429,7 @@ def depends_on_macros(self): @dataclass -class AnalysisNode(AnalysisNodeResource, CompiledNode): +class AnalysisNode(AnalysisResource, CompiledNode): pass @@ -439,7 +439,7 @@ class HookNode(HookNodeResource, CompiledNode): @dataclass -class ModelNode(ModelNodeResource, CompiledNode): +class ModelNode(ModelResource, CompiledNode): @classmethod def from_args(cls, args: ModelNodeArgs) -> "ModelNode": unique_id = args.unique_id @@ -717,7 +717,7 @@ def same_contract(self, old, adapter_type=None) -> bool: @dataclass -class SqlNode(SqlNodeResource, CompiledNode): +class SqlNode(SqlOperationResource, CompiledNode): pass @@ -727,7 +727,7 @@ class SqlNode(SqlNodeResource, CompiledNode): @dataclass -class SeedNode(SeedNodeResource, ParsedNode): # No SQLDefaults! +class SeedNode(SeedResource, ParsedNode): # No SQLDefaults! def same_seeds(self, other: "SeedNode") -> bool: # for seeds, we check the hashes. If the hashes are different types, # no match. If the hashes are both the same 'path', log a warning and @@ -845,7 +845,7 @@ def is_relational(self): @dataclass -class SingularTestNode(SingularTestNodeResource, TestShouldStoreFailures, CompiledNode): +class SingularTestNode(SingularTestResource, TestShouldStoreFailures, CompiledNode): @property def test_node_type(self): return "singular" @@ -857,7 +857,7 @@ def test_node_type(self): @dataclass -class GenericTestNode(GenericTestNodeResource, TestShouldStoreFailures, CompiledNode): +class GenericTestNode(GenericTestResource, TestShouldStoreFailures, CompiledNode): def same_contents(self, other, adapter_type: Optional[str]) -> bool: if other is None: return False @@ -976,7 +976,7 @@ class IntermediateSnapshotNode(CompiledNode): @dataclass -class SnapshotNode(SnapshotNodeResource, CompiledNode): +class SnapshotNode(SnapshotResource, CompiledNode): pass From 5c4c52859b6f4ecc573bdeb17545570b902f61ae Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Fri, 16 Feb 2024 17:53:41 -0500 Subject: [PATCH 15/20] Rename some intermediate node classes --- core/dbt/artifacts/resources/__init__.py | 6 +++--- core/dbt/artifacts/resources/v1/components.py | 6 +++--- .../artifacts/resources/v1/manifest_nodes.py | 20 +++++++++---------- core/dbt/contracts/graph/nodes.py | 12 +++++------ 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 4f71ef04eea..aca2719ec8d 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -6,10 +6,10 @@ NodeVersion, RefArgs, HasRelationMetadata, - ParsedNodeMandatory, - ParsedNode, + ParsedResourceMandatory, + ParsedResource, ColumnInfo, - CompiledNode, + CompiledResource, InjectedCTE, Contract, DeferRelation, diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 3bf95694253..692eab5fbb8 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -147,7 +147,7 @@ def identifier(self): @dataclass -class ParsedNodeMandatory(GraphResource, HasRelationMetadata): +class ParsedResourceMandatory(GraphResource, HasRelationMetadata): alias: str checksum: FileHash config: NodeConfig = field(default_factory=NodeConfig) @@ -158,7 +158,7 @@ def identifier(self): @dataclass -class ParsedNode(ParsedNodeMandatory): +class ParsedResource(ParsedResourceMandatory): tags: List[str] = field(default_factory=list) description: str = field(default="") columns: Dict[str, ColumnInfo] = field(default_factory=dict) @@ -176,7 +176,7 @@ class ParsedNode(ParsedNodeMandatory): @dataclass -class CompiledNode(ParsedNode): +class CompiledResource(ParsedResource): """Contains attributes necessary for SQL files and nodes with refs, sources, etc, so all ManifestNodes except SeedNode.""" diff --git a/core/dbt/artifacts/resources/v1/manifest_nodes.py b/core/dbt/artifacts/resources/v1/manifest_nodes.py index 5b1abb0362a..545b623d676 100644 --- a/core/dbt/artifacts/resources/v1/manifest_nodes.py +++ b/core/dbt/artifacts/resources/v1/manifest_nodes.py @@ -1,8 +1,8 @@ from dataclasses import dataclass, field from dbt_common.dataclass_schema import dbtClassMixin from dbt.artifacts.resources.v1.components import ( - ParsedNode, - CompiledNode, + ParsedResource, + CompiledResource, NodeVersion, DeferRelation, MacroDependsOn, @@ -15,18 +15,18 @@ @dataclass -class Analysis(CompiledNode): +class Analysis(CompiledResource): resource_type: Literal[NodeType.Analysis] @dataclass -class HookNode(CompiledNode): +class HookNode(CompiledResource): resource_type: Literal[NodeType.Operation] index: Optional[int] = None @dataclass -class Model(CompiledNode): +class Model(CompiledResource): resource_type: Literal[NodeType.Model] access: AccessType = AccessType.Protected config: ModelConfig = field(default_factory=ModelConfig) @@ -38,12 +38,12 @@ class Model(CompiledNode): @dataclass -class SqlOperation(CompiledNode): +class SqlOperation(CompiledResource): resource_type: Literal[NodeType.SqlOperation] @dataclass -class Seed(ParsedNode): # No SQLDefaults! +class Seed(ParsedResource): # No SQLDefaults! resource_type: Literal[NodeType.Seed] config: SeedConfig = field(default_factory=SeedConfig) # seeds need the root_path because the contents are not loaded initially @@ -54,7 +54,7 @@ class Seed(ParsedNode): # No SQLDefaults! @dataclass -class SingularTest(CompiledNode): +class SingularTest(CompiledResource): resource_type: Literal[NodeType.Test] # Was not able to make mypy happy and keep the code working. We need to # refactor the various configs. @@ -74,7 +74,7 @@ class TestMetadata(dbtClassMixin): @dataclass -class GenericTest(CompiledNode): +class GenericTest(CompiledResource): resource_type: Literal[NodeType.Test] column_name: Optional[str] = None file_key_name: Optional[str] = None @@ -86,7 +86,7 @@ class GenericTest(CompiledNode): @dataclass -class Snapshot(CompiledNode): +class Snapshot(CompiledResource): resource_type: Literal[NodeType.Snapshot] config: SnapshotConfig defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index dd341c54d82..146f54f1a91 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -72,9 +72,9 @@ GraphResource, SavedQuery as SavedQueryResource, SemanticModel as SemanticModelResource, - ParsedNodeMandatory as ParsedNodeMandatoryResource, - ParsedNode as ParsedNodeResource, - CompiledNode as CompiledNodeResource, + ParsedResourceMandatory, + ParsedResource, + CompiledResource, HasRelationMetadata as HasRelationMetadataResource, FileHash, NodeConfig, @@ -200,7 +200,7 @@ def quoting_dict(self) -> Dict[str, bool]: @dataclass -class ParsedNodeMandatory(ParsedNodeMandatoryResource, GraphNode, HasRelationMetadata): +class ParsedNodeMandatory(ParsedResourceMandatory, GraphNode, HasRelationMetadata): pass @@ -242,7 +242,7 @@ def clear_event_status(self): @dataclass -class ParsedNode(ParsedNodeResource, NodeInfoMixin, ParsedNodeMandatory, SerializableType): +class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, SerializableType): def get_target_write_path(self, target_path: str, subdirectory: str): # This is called for both the "compiled" subdirectory of "target" and the "run" subdirectory if os.path.basename(self.path) == os.path.basename(self.original_file_path): @@ -379,7 +379,7 @@ def is_external_node(self): @dataclass -class CompiledNode(CompiledNodeResource, ParsedNode): +class CompiledNode(CompiledResource, ParsedNode): """Contains attributes necessary for SQL files and nodes with refs, sources, etc, so all ManifestNodes except SeedNode.""" From d43409a4513953a8f6098ef2c8a400c7ef7272e7 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 20 Feb 2024 10:40:19 -0500 Subject: [PATCH 16/20] Fix merge error --- core/dbt/artifacts/resources/v1/components.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 692eab5fbb8..6a131ef761d 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -129,11 +129,25 @@ def __bool__(self): @dataclass class HasRelationMetadata(dbtClassMixin): + database: Optional[str] + schema: str + # Can't set database to None like it ought to be # because it messes up the subclasses and default parameters # so hack it here - database: Optional[str] - schema: str + @classmethod + def __pre_deserialize__(cls, data): + data = super().__pre_deserialize__(data) + if "database" not in data: + data["database"] = None + return data + + @property + def quoting_dict(self) -> Dict[str, bool]: + if hasattr(self, "quoting"): + return self.quoting.to_dict(omit_none=True) + else: + return {} @dataclass From 3a4ead8b12b11062cc9097ee9e92444a08d78814 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 20 Feb 2024 11:40:34 -0500 Subject: [PATCH 17/20] Change NodeType.Model back to having NodeConfig --- core/dbt/contracts/graph/model_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 12753794859..50719d2be33 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -11,8 +11,8 @@ TestConfig, SnapshotConfig, SourceConfig, - ModelConfig, ) +from dbt.artifacts.resources import ModelConfig # noqa from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt.contracts.util import list_str @@ -56,6 +56,8 @@ class UnitTestConfig(BaseConfig): ) +# We get weird failures if NodeType.Model below is changed +# to have ModelConfig, like you'd expect RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = { NodeType.Metric: MetricConfig, NodeType.SemanticModel: SemanticModelConfig, @@ -64,7 +66,7 @@ class UnitTestConfig(BaseConfig): NodeType.Source: SourceConfig, NodeType.Seed: SeedConfig, NodeType.Test: TestConfig, - NodeType.Model: ModelConfig, + NodeType.Model: NodeConfig, NodeType.Snapshot: SnapshotConfig, NodeType.Unit: UnitTestConfig, } From 82dbeacfaec3a436d4706c2b86a97132114b8123 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 20 Feb 2024 14:34:46 -0500 Subject: [PATCH 18/20] Separate out manifest nodes into individual files --- core/dbt/artifacts/resources/__init__.py | 26 +++--- core/dbt/artifacts/resources/v1/analysis.py | 9 ++ core/dbt/artifacts/resources/v1/config.py | 81 +--------------- .../artifacts/resources/v1/generic_test.py | 30 ++++++ core/dbt/artifacts/resources/v1/hook.py | 10 ++ .../artifacts/resources/v1/manifest_nodes.py | 92 ------------------- core/dbt/artifacts/resources/v1/model.py | 28 ++++++ core/dbt/artifacts/resources/v1/seed.py | 30 ++++++ .../artifacts/resources/v1/singular_test.py | 13 +++ core/dbt/artifacts/resources/v1/snapshot.py | 66 +++++++++++++ .../resources/v1/source_definition.py | 7 +- .../artifacts/resources/v1/sql_operation.py | 9 ++ 12 files changed, 213 insertions(+), 188 deletions(-) create mode 100644 core/dbt/artifacts/resources/v1/analysis.py create mode 100644 core/dbt/artifacts/resources/v1/generic_test.py create mode 100644 core/dbt/artifacts/resources/v1/hook.py delete mode 100644 core/dbt/artifacts/resources/v1/manifest_nodes.py create mode 100644 core/dbt/artifacts/resources/v1/model.py create mode 100644 core/dbt/artifacts/resources/v1/seed.py create mode 100644 core/dbt/artifacts/resources/v1/singular_test.py create mode 100644 core/dbt/artifacts/resources/v1/snapshot.py create mode 100644 core/dbt/artifacts/resources/v1/sql_operation.py diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index aca2719ec8d..57e44e3eb5b 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -17,17 +17,16 @@ Quoting, Time, ) -from dbt.artifacts.resources.v1.manifest_nodes import ( - Analysis, - HookNode, - Model, - SqlOperation, - Seed, - SingularTest, - TestMetadata, - GenericTest, - Snapshot, -) +from dbt.artifacts.resources.v1.analysis import Analysis +from dbt.artifacts.resources.v1.hook import HookNode +from dbt.artifacts.resources.v1.model import Model, ModelConfig +from dbt.artifacts.resources.v1.sql_operation import SqlOperation +from dbt.artifacts.resources.v1.seed import Seed, SeedConfig +from dbt.artifacts.resources.v1.singular_test import SingularTest +from dbt.artifacts.resources.v1.generic_test import GenericTest, TestMetadata +from dbt.artifacts.resources.v1.snapshot import Snapshot, SnapshotConfig + + from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( Exposure, @@ -79,15 +78,12 @@ from dbt.artifacts.resources.v1.config import ( NodeAndTestConfig, NodeConfig, - ModelConfig, - SeedConfig, TestConfig, - SnapshotConfig, - SourceConfig, Hook, ) from dbt.artifacts.resources.v1.source_definition import ( + SourceConfig, ExternalPartition, ExternalTable, SourceDefinition, diff --git a/core/dbt/artifacts/resources/v1/analysis.py b/core/dbt/artifacts/resources/v1/analysis.py new file mode 100644 index 00000000000..60f90e61576 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/analysis.py @@ -0,0 +1,9 @@ +from dbt.artifacts.resources.v1.components import CompiledResource +from typing import Literal +from dataclasses import dataclass +from dbt.artifacts.resources.types import NodeType + + +@dataclass +class Analysis(CompiledResource): + resource_type: Literal[NodeType.Analysis] diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py index 464a585e18e..d58d2ff4c5d 100644 --- a/core/dbt/artifacts/resources/v1/config.py +++ b/core/dbt/artifacts/resources/v1/config.py @@ -10,7 +10,7 @@ from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt_common.contracts.config.materialization import OnConfigurationChangeOption from dbt.artifacts.resources.base import Docs -from dbt.artifacts.resources.types import ModelHookType, AccessType +from dbt.artifacts.resources.types import ModelHookType from dbt.contracts.graph.utils import validate_color from dbt import hooks from mashumaro.jsonschema.annotations import Pattern @@ -154,32 +154,6 @@ def __pre_deserialize__(cls, data): return data -@dataclass -class SourceConfig(BaseConfig): - enabled: bool = True - - -@dataclass -class ModelConfig(NodeConfig): - access: AccessType = field( - default=AccessType.Protected, - metadata=MergeBehavior.Update.meta(), - ) - - -@dataclass -class SeedConfig(NodeConfig): - materialized: str = "seed" - delimiter: str = "," - quote_columns: Optional[bool] = None - - @classmethod - def validate(cls, data): - super().validate(data) - if data.get("materialized") and data.get("materialized") != "seed": - raise ValidationError("A seed must have a materialized value of 'seed'") - - SEVERITY_PATTERN = r"^([Ww][Aa][Rr][Nn]|[Ee][Rr][Rr][Oo][Rr])$" @@ -279,56 +253,3 @@ def validate(cls, data): super().validate(data) if data.get("materialized") and data.get("materialized") != "test": raise ValidationError("A test must have a materialized value of 'test'") - - -@dataclass -class SnapshotConfig(NodeConfig): - materialized: str = "snapshot" - strategy: Optional[str] = None - unique_key: Optional[str] = None - target_schema: Optional[str] = None - target_database: Optional[str] = None - updated_at: Optional[str] = None - # Not using Optional because of serialization issues with a Union of str and List[str] - check_cols: Union[str, List[str], None] = None - - @classmethod - def validate(cls, data): - super().validate(data) - # Note: currently you can't just set these keys in schema.yml because this validation - # will fail when parsing the snapshot node. - if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"): - raise ValidationError( - "Snapshots must be configured with a 'strategy', 'unique_key', " - "and 'target_schema'." - ) - if data.get("strategy") == "check": - if not data.get("check_cols"): - raise ValidationError( - "A snapshot configured with the check strategy must " - "specify a check_cols configuration." - ) - if isinstance(data["check_cols"], str) and data["check_cols"] != "all": - raise ValidationError( - f"Invalid value for 'check_cols': {data['check_cols']}. " - "Expected 'all' or a list of strings." - ) - elif data.get("strategy") == "timestamp": - if not data.get("updated_at"): - raise ValidationError( - "A snapshot configured with the timestamp strategy " - "must specify an updated_at configuration." - ) - if data.get("check_cols"): - raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'") - # If the strategy is not 'check' or 'timestamp' it's a custom strategy, - # formerly supported with GenericSnapshotConfig - - if data.get("materialized") and data.get("materialized") != "snapshot": - raise ValidationError("A snapshot must have a materialized value of 'snapshot'") - - # Called by "calculate_node_config_dict" in ContextConfigGenerator - def finalize_and_validate(self): - data = self.to_dict(omit_none=True) - self.validate(data) - return self.from_dict(data) diff --git a/core/dbt/artifacts/resources/v1/generic_test.py b/core/dbt/artifacts/resources/v1/generic_test.py new file mode 100644 index 00000000000..b24be584b3a --- /dev/null +++ b/core/dbt/artifacts/resources/v1/generic_test.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass, field +from typing import Optional, Any, Dict, Literal +from dbt_common.dataclass_schema import dbtClassMixin +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.config import TestConfig +from dbt.artifacts.resources.v1.components import CompiledResource + + +@dataclass +class TestMetadata(dbtClassMixin): + __test__ = False + + name: str = "test" # dummy default to allow default in GenericTestNode. Should always be set. + # kwargs are the args that are left in the test builder after + # removing configs. They are set from the test builder when + # the test node is created. + kwargs: Dict[str, Any] = field(default_factory=dict) + namespace: Optional[str] = None + + +@dataclass +class GenericTest(CompiledResource): + resource_type: Literal[NodeType.Test] + column_name: Optional[str] = None + file_key_name: Optional[str] = None + # Was not able to make mypy happy and keep the code working. We need to + # refactor the various configs. + config: TestConfig = field(default_factory=TestConfig) # type: ignore + attached_node: Optional[str] = None + test_metadata: TestMetadata = field(default_factory=TestMetadata) diff --git a/core/dbt/artifacts/resources/v1/hook.py b/core/dbt/artifacts/resources/v1/hook.py new file mode 100644 index 00000000000..dcfb4684c68 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/hook.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass +from typing import Optional, Literal +from dbt.artifacts.resources.v1.components import CompiledResource +from dbt.artifacts.resources.types import NodeType + + +@dataclass +class HookNode(CompiledResource): + resource_type: Literal[NodeType.Operation] + index: Optional[int] = None diff --git a/core/dbt/artifacts/resources/v1/manifest_nodes.py b/core/dbt/artifacts/resources/v1/manifest_nodes.py deleted file mode 100644 index 545b623d676..00000000000 --- a/core/dbt/artifacts/resources/v1/manifest_nodes.py +++ /dev/null @@ -1,92 +0,0 @@ -from dataclasses import dataclass, field -from dbt_common.dataclass_schema import dbtClassMixin -from dbt.artifacts.resources.v1.components import ( - ParsedResource, - CompiledResource, - NodeVersion, - DeferRelation, - MacroDependsOn, -) -from dbt_common.contracts.constraints import ModelLevelConstraint -from dbt.artifacts.resources.v1.config import ModelConfig, SeedConfig, TestConfig, SnapshotConfig -from typing import Literal, Optional, List, Dict, Any -from dbt.artifacts.resources.types import NodeType, AccessType -from datetime import datetime - - -@dataclass -class Analysis(CompiledResource): - resource_type: Literal[NodeType.Analysis] - - -@dataclass -class HookNode(CompiledResource): - resource_type: Literal[NodeType.Operation] - index: Optional[int] = None - - -@dataclass -class Model(CompiledResource): - resource_type: Literal[NodeType.Model] - access: AccessType = AccessType.Protected - config: ModelConfig = field(default_factory=ModelConfig) - constraints: List[ModelLevelConstraint] = field(default_factory=list) - version: Optional[NodeVersion] = None - latest_version: Optional[NodeVersion] = None - deprecation_date: Optional[datetime] = None - defer_relation: Optional[DeferRelation] = None - - -@dataclass -class SqlOperation(CompiledResource): - resource_type: Literal[NodeType.SqlOperation] - - -@dataclass -class Seed(ParsedResource): # No SQLDefaults! - resource_type: Literal[NodeType.Seed] - config: SeedConfig = field(default_factory=SeedConfig) - # seeds need the root_path because the contents are not loaded initially - # and we need the root_path to load the seed later - root_path: Optional[str] = None - depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) - defer_relation: Optional[DeferRelation] = None - - -@dataclass -class SingularTest(CompiledResource): - resource_type: Literal[NodeType.Test] - # Was not able to make mypy happy and keep the code working. We need to - # refactor the various configs. - config: TestConfig = field(default_factory=TestConfig) # type: ignore - - -@dataclass -class TestMetadata(dbtClassMixin): - __test__ = False - - name: str = "test" # dummy default to allow default in GenericTestNode. Should always be set. - # kwargs are the args that are left in the test builder after - # removing configs. They are set from the test builder when - # the test node is created. - kwargs: Dict[str, Any] = field(default_factory=dict) - namespace: Optional[str] = None - - -@dataclass -class GenericTest(CompiledResource): - resource_type: Literal[NodeType.Test] - column_name: Optional[str] = None - file_key_name: Optional[str] = None - # Was not able to make mypy happy and keep the code working. We need to - # refactor the various configs. - config: TestConfig = field(default_factory=TestConfig) # type: ignore - attached_node: Optional[str] = None - test_metadata: TestMetadata = field(default_factory=TestMetadata) - - -@dataclass -class Snapshot(CompiledResource): - resource_type: Literal[NodeType.Snapshot] - config: SnapshotConfig - defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/model.py b/core/dbt/artifacts/resources/v1/model.py new file mode 100644 index 00000000000..afb5edaad54 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/model.py @@ -0,0 +1,28 @@ +from dataclasses import dataclass, field +from typing import Literal, Optional, List +from datetime import datetime +from dbt_common.contracts.config.base import MergeBehavior +from dbt_common.contracts.constraints import ModelLevelConstraint +from dbt.artifacts.resources.v1.config import NodeConfig +from dbt.artifacts.resources.types import AccessType, NodeType +from dbt.artifacts.resources.v1.components import DeferRelation, NodeVersion, CompiledResource + + +@dataclass +class ModelConfig(NodeConfig): + access: AccessType = field( + default=AccessType.Protected, + metadata=MergeBehavior.Clobber.meta(), + ) + + +@dataclass +class Model(CompiledResource): + resource_type: Literal[NodeType.Model] + access: AccessType = AccessType.Protected + config: ModelConfig = field(default_factory=ModelConfig) + constraints: List[ModelLevelConstraint] = field(default_factory=list) + version: Optional[NodeVersion] = None + latest_version: Optional[NodeVersion] = None + deprecation_date: Optional[datetime] = None + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/seed.py b/core/dbt/artifacts/resources/v1/seed.py new file mode 100644 index 00000000000..47a16352cf2 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/seed.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass, field +from typing import Optional, Literal +from dbt_common.dataclass_schema import ValidationError +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import MacroDependsOn, DeferRelation, ParsedResource +from dbt.artifacts.resources.v1.config import NodeConfig + + +@dataclass +class SeedConfig(NodeConfig): + materialized: str = "seed" + delimiter: str = "," + quote_columns: Optional[bool] = None + + @classmethod + def validate(cls, data): + super().validate(data) + if data.get("materialized") and data.get("materialized") != "seed": + raise ValidationError("A seed must have a materialized value of 'seed'") + + +@dataclass +class Seed(ParsedResource): # No SQLDefaults! + resource_type: Literal[NodeType.Seed] + config: SeedConfig = field(default_factory=SeedConfig) + # seeds need the root_path because the contents are not loaded initially + # and we need the root_path to load the seed later + root_path: Optional[str] = None + depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/singular_test.py b/core/dbt/artifacts/resources/v1/singular_test.py new file mode 100644 index 00000000000..76b47183c51 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/singular_test.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass, field +from typing import Literal +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import CompiledResource +from dbt.artifacts.resources.v1.config import TestConfig + + +@dataclass +class SingularTest(CompiledResource): + resource_type: Literal[NodeType.Test] + # Was not able to make mypy happy and keep the code working. We need to + # refactor the various configs. + config: TestConfig = field(default_factory=TestConfig) # type: ignore diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py new file mode 100644 index 00000000000..3eceb9bb1d2 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -0,0 +1,66 @@ +from typing import Union, List, Optional, Literal +from dataclasses import dataclass +from dbt_common.dataclass_schema import ValidationError +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import CompiledResource, DeferRelation +from dbt.artifacts.resources.v1.config import NodeConfig + + +@dataclass +class SnapshotConfig(NodeConfig): + materialized: str = "snapshot" + strategy: Optional[str] = None + unique_key: Optional[str] = None + target_schema: Optional[str] = None + target_database: Optional[str] = None + updated_at: Optional[str] = None + # Not using Optional because of serialization issues with a Union of str and List[str] + check_cols: Union[str, List[str], None] = None + + @classmethod + def validate(cls, data): + super().validate(data) + # Note: currently you can't just set these keys in schema.yml because this validation + # will fail when parsing the snapshot node. + if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"): + raise ValidationError( + "Snapshots must be configured with a 'strategy', 'unique_key', " + "and 'target_schema'." + ) + if data.get("strategy") == "check": + if not data.get("check_cols"): + raise ValidationError( + "A snapshot configured with the check strategy must " + "specify a check_cols configuration." + ) + if isinstance(data["check_cols"], str) and data["check_cols"] != "all": + raise ValidationError( + f"Invalid value for 'check_cols': {data['check_cols']}. " + "Expected 'all' or a list of strings." + ) + elif data.get("strategy") == "timestamp": + if not data.get("updated_at"): + raise ValidationError( + "A snapshot configured with the timestamp strategy " + "must specify an updated_at configuration." + ) + if data.get("check_cols"): + raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'") + # If the strategy is not 'check' or 'timestamp' it's a custom strategy, + # formerly supported with GenericSnapshotConfig + + if data.get("materialized") and data.get("materialized") != "snapshot": + raise ValidationError("A snapshot must have a materialized value of 'snapshot'") + + # Called by "calculate_node_config_dict" in ContextConfigGenerator + def finalize_and_validate(self): + data = self.to_dict(omit_none=True) + self.validate(data) + return self.from_dict(data) + + +@dataclass +class Snapshot(CompiledResource): + resource_type: Literal[NodeType.Snapshot] + config: SnapshotConfig + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index a562d4fa264..e5a9ab1d98e 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -9,13 +9,18 @@ HasRelationMetadata, Quoting, ) -from dbt.artifacts.resources.v1.config import SourceConfig +from dbt.artifacts.resources.v1.config import BaseConfig from dbt_common.contracts.config.properties import AdditionalPropertiesAllowed from dbt_common.contracts.util import Mergeable from dbt_common.exceptions import CompilationError from typing import Any, Dict, List, Literal, Optional, Union +@dataclass +class SourceConfig(BaseConfig): + enabled: bool = True + + @dataclass class ExternalPartition(AdditionalPropertiesAllowed): name: str = "" diff --git a/core/dbt/artifacts/resources/v1/sql_operation.py b/core/dbt/artifacts/resources/v1/sql_operation.py new file mode 100644 index 00000000000..fd8e79b21a1 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/sql_operation.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import Literal +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import CompiledResource + + +@dataclass +class SqlOperation(CompiledResource): + resource_type: Literal[NodeType.SqlOperation] From b0646c50a3b06d3289437415839d4c768869a031 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 20 Feb 2024 15:18:56 -0500 Subject: [PATCH 19/20] Use ModelConfig in model_config type table --- core/dbt/contracts/graph/model_config.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 50719d2be33..12753794859 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -11,8 +11,8 @@ TestConfig, SnapshotConfig, SourceConfig, + ModelConfig, ) -from dbt.artifacts.resources import ModelConfig # noqa from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior from dbt_common.contracts.config.metadata import Metadata, ShowBehavior from dbt.contracts.util import list_str @@ -56,8 +56,6 @@ class UnitTestConfig(BaseConfig): ) -# We get weird failures if NodeType.Model below is changed -# to have ModelConfig, like you'd expect RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = { NodeType.Metric: MetricConfig, NodeType.SemanticModel: SemanticModelConfig, @@ -66,7 +64,7 @@ class UnitTestConfig(BaseConfig): NodeType.Source: SourceConfig, NodeType.Seed: SeedConfig, NodeType.Test: TestConfig, - NodeType.Model: NodeConfig, + NodeType.Model: ModelConfig, NodeType.Snapshot: SnapshotConfig, NodeType.Unit: UnitTestConfig, } From 07229e2c7b233af39f1ef64c7d88748e86fa2322 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 20 Feb 2024 15:35:38 -0500 Subject: [PATCH 20/20] validate patch.config['access'] --- core/dbt/parser/schemas.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 9c67cfff665..2a4896bd2cf 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -562,6 +562,14 @@ def validate_and_rename(data): validate_and_rename(column) def patch_node_config(self, node, patch): + if "access" in patch.config: + if AccessType.is_valid(patch.config["access"]): + patch.config["access"] = AccessType(patch.config["access"]) + else: + raise InvalidAccessTypeError( + unique_id=node.unique_id, + field_value=patch.config["access"], + ) # Get the ContextConfig that's used in calculating the config # This must match the model resource_type that's being patched config = ContextConfig(