Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move manifest nodes to dbt/artifacts #9538

Merged
merged 25 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
33f475d
Move ParsedNodeMandatory; need to fix up a few things from core
gshank Feb 1, 2024
6d5aaeb
cleanup, remove NodeAndTestConfig and NodeConfig from model_config.py
gshank Feb 1, 2024
73b4f9b
Remove some unnecessar type: ignores
gshank Feb 1, 2024
755e41c
Move MacroDependsOn and Docs (circular reference)
gshank Feb 1, 2024
6d48ccc
Merge branch 'main' into manifest_nodes_to_artifacts
gshank Feb 6, 2024
6313fa2
ParsedNode, initial attempt
gshank Feb 6, 2024
9c28930
Move CompiledNode
gshank Feb 6, 2024
a8f3690
AnalysisNode, HookNode, ModelNode, ModelConfig
gshank Feb 6, 2024
81479b1
SqlNode and SeedNode
gshank Feb 6, 2024
e4d554f
Move SingularTestNode and TestConfig
gshank Feb 7, 2024
6029dcf
Move GenericTestNode
gshank Feb 7, 2024
433c556
Move SnapshotNode and SnapshotConfig
gshank Feb 7, 2024
4d6c698
Changie
gshank Feb 7, 2024
82a110a
Merge branch 'main' into gs/manifest_nodes_to_artifacts
gshank Feb 12, 2024
4974d13
Merge branch 'main' into gs/manifest_nodes_to_artifacts
gshank Feb 14, 2024
520f3dd
Merge branch 'main' into gs/manifest_nodes_to_artifacts
gshank Feb 16, 2024
d5e7402
Remove duplicate Hook class, use ModelConfig in config dictionary
gshank Feb 16, 2024
2fabc84
Merge branch 'main' into gs/manifest_nodes_to_artifacts
gshank Feb 16, 2024
9cedcd3
Remove Node from the names of a bunch of resources
gshank Feb 16, 2024
5c4c528
Rename some intermediate node classes
gshank Feb 16, 2024
d43409a
Fix merge error
gshank Feb 20, 2024
3a4ead8
Change NodeType.Model back to having NodeConfig
gshank Feb 20, 2024
82dbeac
Separate out manifest nodes into individual files
gshank Feb 20, 2024
b0646c5
Use ModelConfig in model_config type table
gshank Feb 20, 2024
07229e2
validate patch.config['access']
gshank Feb 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20240207-122342.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Move manifest nodes to artifacts
time: 2024-02-07T12:23:42.909049-05:00
custom:
Author: gshank
Issue: "9388"
37 changes: 30 additions & 7 deletions core/dbt/artifacts/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
from dbt.artifacts.resources.base import BaseResource, GraphResource
from dbt.artifacts.resources.base import BaseResource, GraphResource, FileHash, Docs

# alias to latest resource definitions
from dbt.artifacts.resources.v1.components import (
ColumnInfo,
DependsOn,
FreshnessThreshold,
HasRelationMetadata,
NodeVersion,
Quoting,
RefArgs,
HasRelationMetadata,
ParsedResourceMandatory,
ParsedResource,
ColumnInfo,
CompiledResource,
InjectedCTE,
Contract,
DeferRelation,
FreshnessThreshold,
Quoting,
Time,
)
from dbt.artifacts.resources.v1.analysis import Analysis
from dbt.artifacts.resources.v1.hook import HookNode
from dbt.artifacts.resources.v1.model import Model, ModelConfig
from dbt.artifacts.resources.v1.sql_operation import SqlOperation
from dbt.artifacts.resources.v1.seed import Seed, SeedConfig
from dbt.artifacts.resources.v1.singular_test import SingularTest
from dbt.artifacts.resources.v1.generic_test import GenericTest, TestMetadata
from dbt.artifacts.resources.v1.snapshot import Snapshot, SnapshotConfig


from dbt.artifacts.resources.v1.documentation import Documentation
from dbt.artifacts.resources.v1.exposure import (
Exposure,
Expand All @@ -19,7 +35,6 @@
MaturityType,
)
from dbt.artifacts.resources.v1.macro import Macro, MacroDependsOn, MacroArgument
from dbt.artifacts.resources.v1.docs import Docs
from dbt.artifacts.resources.v1.group import Group
from dbt.artifacts.resources.v1.metric import (
ConstantPropertyInput,
Expand Down Expand Up @@ -59,10 +74,18 @@
SemanticModel,
SemanticModelConfig,
)

from dbt.artifacts.resources.v1.config import (
NodeAndTestConfig,
NodeConfig,
TestConfig,
Hook,
)

from dbt.artifacts.resources.v1.source_definition import (
SourceConfig,
ExternalPartition,
ExternalTable,
SourceDefinition,
ParsedSourceMandatory,
SourceConfig,
)
49 changes: 48 additions & 1 deletion core/dbt/artifacts/resources/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass
from dbt_common.dataclass_schema import dbtClassMixin
from typing import List
from typing import List, Optional
import hashlib

from dbt.artifacts.resources.types import NodeType

Expand All @@ -18,3 +19,49 @@
@dataclass
class GraphResource(BaseResource):
fqn: List[str]


@dataclass
class FileHash(dbtClassMixin):
name: str # the hash type name
checksum: str # the hashlib.hash_type().hexdigest() of the file contents

@classmethod
def empty(cls):
return FileHash(name="none", checksum="")

@classmethod
def path(cls, path: str):
return FileHash(name="path", checksum=path)

def __eq__(self, other):
if not isinstance(other, FileHash):
return NotImplemented

Check warning on line 39 in core/dbt/artifacts/resources/base.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/artifacts/resources/base.py#L39

Added line #L39 was not covered by tests

if self.name == "none" or self.name != other.name:
return False

return self.checksum == other.checksum

def compare(self, contents: str) -> bool:
"""Compare the file contents with the given hash"""
if self.name == "none":
return False

Check warning on line 49 in core/dbt/artifacts/resources/base.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/artifacts/resources/base.py#L48-L49

Added lines #L48 - L49 were not covered by tests

return self.from_contents(contents, name=self.name) == self.checksum

Check warning on line 51 in core/dbt/artifacts/resources/base.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/artifacts/resources/base.py#L51

Added line #L51 was not covered by tests

@classmethod
def from_contents(cls, contents: str, name="sha256") -> "FileHash":
"""Create a file hash from the given file contents. The hash is always
the utf-8 encoding of the contents given, because dbt only reads files
as utf-8.
"""
data = contents.encode("utf-8")
checksum = hashlib.new(name, data).hexdigest()
return cls(name=name, checksum=checksum)


@dataclass
class Docs(dbtClassMixin):
show: bool = True
node_color: Optional[str] = None
5 changes: 5 additions & 0 deletions core/dbt/artifacts/resources/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ class ModelLanguage(StrEnum):
sql = "sql"


class ModelHookType(StrEnum):
PreHook = "pre-hook"
PostHook = "post-hook"


class TimePeriod(StrEnum):
minute = "minute"
hour = "hour"
Expand Down
9 changes: 9 additions & 0 deletions core/dbt/artifacts/resources/v1/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from dbt.artifacts.resources.v1.components import CompiledResource
from typing import Literal
from dataclasses import dataclass
from dbt.artifacts.resources.types import NodeType


@dataclass
class Analysis(CompiledResource):
resource_type: Literal[NodeType.Analysis]
95 changes: 90 additions & 5 deletions core/dbt/artifacts/resources/v1/components.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
import time
from dataclasses import dataclass, field
from datetime import timedelta
from dbt.artifacts.resources.types import TimePeriod
from dbt.artifacts.resources.v1.macro import MacroDependsOn
from dbt.artifacts.resources.base import GraphResource, FileHash, Docs
from dbt.artifacts.resources.v1.config import NodeConfig
from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin
from dbt_common.contracts.config.properties import AdditionalPropertiesMixin
from dbt_common.contracts.constraints import ColumnLevelConstraint
from typing import Dict, List, Optional, Union, Any
from datetime import timedelta
from dbt.artifacts.resources.types import TimePeriod
from dbt_common.contracts.util import Mergeable
from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin
from typing import Any, Dict, List, Optional, Union


NodeVersion = Union[str, float]


@dataclass
class MacroDependsOn(dbtClassMixin):
macros: List[str] = field(default_factory=list)

# 'in' on lists is O(n) so this is O(n^2) for # of macros
def add_macro(self, value: str):
if value not in self.macros:
self.macros.append(value)


@dataclass
class DependsOn(MacroDependsOn):
nodes: List[str] = field(default_factory=list)
Expand Down Expand Up @@ -56,6 +68,21 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin):
_extra: Dict[str, Any] = field(default_factory=dict)


@dataclass
class InjectedCTE(dbtClassMixin):
"""Used in CompiledNodes as part of ephemeral model processing"""

id: str
sql: str


@dataclass
class Contract(dbtClassMixin):
enforced: bool = False
alias_types: bool = True
checksum: Optional[str] = None


@dataclass
class Quoting(dbtClassMixin, Mergeable):
database: Optional[bool] = None
Expand Down Expand Up @@ -121,3 +148,61 @@ def quoting_dict(self) -> Dict[str, bool]:
return self.quoting.to_dict(omit_none=True)
else:
return {}


@dataclass
class DeferRelation(HasRelationMetadata):
alias: str
relation_name: Optional[str]

@property
def identifier(self):
return self.alias


@dataclass
class ParsedResourceMandatory(GraphResource, HasRelationMetadata):
alias: str
checksum: FileHash
config: NodeConfig = field(default_factory=NodeConfig)

@property
def identifier(self):
return self.alias


@dataclass
class ParsedResource(ParsedResourceMandatory):
tags: List[str] = field(default_factory=list)
description: str = field(default="")
columns: Dict[str, ColumnInfo] = field(default_factory=dict)
meta: Dict[str, Any] = field(default_factory=dict)
group: Optional[str] = None
docs: Docs = field(default_factory=Docs)
patch_path: Optional[str] = None
build_path: Optional[str] = None
deferred: bool = False
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
raw_code: str = ""


@dataclass
class CompiledResource(ParsedResource):
"""Contains attributes necessary for SQL files and nodes with refs, sources, etc,
so all ManifestNodes except SeedNode."""

language: str = "sql"
refs: List[RefArgs] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
metrics: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
compiled_path: Optional[str] = None
compiled: bool = False
compiled_code: Optional[str] = None
extra_ctes_injected: bool = False
extra_ctes: List[InjectedCTE] = field(default_factory=list)
_pre_injected_sql: Optional[str] = None
contract: Contract = field(default_factory=Contract)
Loading
Loading