From 610a651b5fc9210ed865792a5a0e12ea34864a68 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Mon, 29 Jan 2024 15:07:47 -0800 Subject: [PATCH 01/10] Move `SemanticModel` sub dataclasses to dbt/artifacts --- core/dbt/artifacts/resources/__init__.py | 10 +++++++ .../resources/v1/semantic_model.py} | 0 core/dbt/contracts/graph/nodes.py | 29 +++++++++---------- core/dbt/contracts/graph/unparsed.py | 2 +- core/dbt/parser/schema_yaml_readers.py | 14 ++++----- tests/unit/test_contracts_graph_parsed.py | 4 ++- ..._semantic_layer_nodes_satisfy_protocols.py | 18 +++++------- tests/unit/test_semantic_models.py | 2 +- 8 files changed, 42 insertions(+), 37 deletions(-) rename core/dbt/{contracts/graph/semantic_models.py => artifacts/resources/v1/semantic_model.py} (100%) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 6e22c65966a..2a6c0d39c79 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -31,3 +31,13 @@ WhereFilter, WhereFilterIntersection, ) +from dbt.artifacts.resources.v1.semantic_model import ( + Defaults, + Dimension, + DimensionTypeParams, + DimensionValidityParams, + Entity, + Measure, + MeasureAggregationParameters, + NonAdditiveDimension, +) diff --git a/core/dbt/contracts/graph/semantic_models.py b/core/dbt/artifacts/resources/v1/semantic_model.py similarity index 100% rename from core/dbt/contracts/graph/semantic_models.py rename to core/dbt/artifacts/resources/v1/semantic_model.py diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 96b95f885a4..ccb215a027b 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -17,12 +17,6 @@ from dbt_common.clients.system import write_file from dbt.contracts.files import FileHash -from dbt.contracts.graph.semantic_models import ( - Defaults, - Dimension, - Entity, - Measure, -) from dbt.contracts.graph.unparsed import ( ExposureType, ExternalTable, @@ -84,10 +78,14 @@ from dbt.artifacts.resources import ( BaseResource, + Defaults as DefaultsResource, DependsOn, + Dimension as DimensionResource, Docs, + Entity as EntityResource, MacroDependsOn, MacroArgument, + Measure as MeasureResource, Documentation as DocumentationResource, Macro as MacroResource, Metric as MetricResource, @@ -99,7 +97,6 @@ SourceFileMetadata as SourceFileMetadataResource, ) - # ===================================================================== # This contains the classes for all of the nodes and node-like objects # in the manifest. In the "nodes" dictionary of the manifest we find @@ -1536,10 +1533,10 @@ class SemanticModel(GraphNode): node_relation: Optional[NodeRelation] description: Optional[str] = None label: Optional[str] = None - defaults: Optional[Defaults] = None - entities: Sequence[Entity] = field(default_factory=list) - measures: Sequence[Measure] = field(default_factory=list) - dimensions: Sequence[Dimension] = field(default_factory=list) + defaults: Optional[DefaultsResource] = None + entities: Sequence[EntityResource] = field(default_factory=list) + measures: Sequence[MeasureResource] = field(default_factory=list) + dimensions: Sequence[DimensionResource] = field(default_factory=list) metadata: Optional[SourceFileMetadataResource] = None depends_on: DependsOn = field(default_factory=DependsOn) refs: List[RefArgsResource] = field(default_factory=list) @@ -1566,7 +1563,7 @@ def has_validity_dimensions(self) -> bool: return any([dim.validity_params is not None for dim in self.dimensions]) @property - def validity_start_dimension(self) -> Optional[Dimension]: + def validity_start_dimension(self) -> Optional[DimensionResource]: validity_start_dims = [ dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start ] @@ -1575,7 +1572,7 @@ def validity_start_dimension(self) -> Optional[Dimension]: return validity_start_dims[0] @property - def validity_end_dimension(self) -> Optional[Dimension]: + def validity_end_dimension(self) -> Optional[DimensionResource]: validity_end_dims = [ dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end ] @@ -1584,11 +1581,11 @@ def validity_end_dimension(self) -> Optional[Dimension]: return validity_end_dims[0] @property - def partitions(self) -> List[Dimension]: # noqa: D + def partitions(self) -> List[DimensionResource]: # noqa: D return [dim for dim in self.dimensions or [] if dim.is_partition] @property - def partition(self) -> Optional[Dimension]: + def partition(self) -> Optional[DimensionResource]: partitions = self.partitions if not partitions: return None @@ -1609,7 +1606,7 @@ def depends_on_macros(self): def checked_agg_time_dimension_for_measure( self, measure_reference: MeasureReference ) -> TimeDimensionReference: - measure: Optional[Measure] = None + measure: Optional[MeasureResource] = None for measure in self.measures: if measure.reference == measure_reference: measure = measure diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index a1475a50178..852c747dedc 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -15,7 +15,7 @@ ValidationError, ) from dbt.node_types import NodeType -from dbt.contracts.graph.semantic_models import ( +from dbt.artifacts.resources import ( Defaults, DimensionValidityParams, MeasureAggregationParameters, diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 654ae5269d7..a427380ae6c 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -28,24 +28,22 @@ ) from dbt.artifacts.resources import ( ConversionTypeParams, + Dimension, + DimensionTypeParams, + Entity, Export, ExportConfig, + Measure, MetricConfig, MetricInput, MetricInputMeasure, MetricTimeWindow, MetricTypeParams, + NonAdditiveDimension, QueryParams, SavedQueryConfig, WhereFilter, - WhereFilterIntersection, -) -from dbt.contracts.graph.semantic_models import ( - Dimension, - DimensionTypeParams, - Entity, - Measure, - NonAdditiveDimension, + WhereFilterIntersection ) from dbt_common.exceptions import DbtInternalError from dbt.exceptions import YamlParseDictError, JSONValidationError diff --git a/tests/unit/test_contracts_graph_parsed.py b/tests/unit/test_contracts_graph_parsed.py index b7d01185da6..f3cf839731f 100644 --- a/tests/unit/test_contracts_graph_parsed.py +++ b/tests/unit/test_contracts_graph_parsed.py @@ -5,6 +5,9 @@ from hypothesis.strategies import builds, lists from dbt.artifacts.resources import ( + Dimension, + Entity, + Measure, MetricInputMeasure, MetricTypeParams, RefArgs, @@ -42,7 +45,6 @@ TestMetadata, SemanticModel, ) -from dbt.contracts.graph.semantic_models import Dimension, Entity, Measure from dbt.contracts.graph.unparsed import ( ExposureType, FreshnessThreshold, diff --git a/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py b/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py index d793b9285fb..bb4087d7b0e 100644 --- a/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py +++ b/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py @@ -10,23 +10,21 @@ from dbt.artifacts.resources import ( ConstantPropertyInput, ConversionTypeParams, - FileSlice, - MetricInput, - MetricInputMeasure, - MetricTimeWindow, - MetricTypeParams, - SourceFileMetadata, - WhereFilter, -) -from dbt.contracts.graph.semantic_models import ( + Defaults, Dimension, DimensionTypeParams, DimensionValidityParams, - Defaults, Entity, + FileSlice, Measure, MeasureAggregationParameters, + MetricInput, + MetricInputMeasure, + MetricTimeWindow, + MetricTypeParams, NonAdditiveDimension, + SourceFileMetadata, + WhereFilter, ) from dbt.node_types import NodeType from dbt_semantic_interfaces.protocols import ( diff --git a/tests/unit/test_semantic_models.py b/tests/unit/test_semantic_models.py index b1052eb2150..154c57d8585 100644 --- a/tests/unit/test_semantic_models.py +++ b/tests/unit/test_semantic_models.py @@ -2,8 +2,8 @@ from typing import List +from dbt.artifacts.resources import Dimension, Entity, Measure, Defaults from dbt.contracts.graph.nodes import SemanticModel -from dbt.contracts.graph.semantic_models import Dimension, Entity, Measure, Defaults from dbt.node_types import NodeType from dbt_semantic_interfaces.references import MeasureReference from dbt_semantic_interfaces.type_enums import AggregationType, DimensionType, EntityType From 1a32b9f4d214effefd5277b4720482ad7cee2206 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Mon, 29 Jan 2024 15:14:00 -0800 Subject: [PATCH 02/10] Move `NodeRelation` to dbt/artifacts --- core/dbt/artifacts/resources/__init__.py | 1 + core/dbt/artifacts/resources/v1/semantic_model.py | 8 ++++++++ core/dbt/contracts/graph/nodes.py | 11 ++--------- core/dbt/parser/manifest.py | 3 +-- tests/unit/test_graph_selector_methods.py | 2 +- .../test_semantic_layer_nodes_satisfy_protocols.py | 2 +- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 2a6c0d39c79..6354df76e9a 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -39,5 +39,6 @@ Entity, Measure, MeasureAggregationParameters, + NodeRelation, NonAdditiveDimension, ) diff --git a/core/dbt/artifacts/resources/v1/semantic_model.py b/core/dbt/artifacts/resources/v1/semantic_model.py index 53394d02f80..15d406e0715 100644 --- a/core/dbt/artifacts/resources/v1/semantic_model.py +++ b/core/dbt/artifacts/resources/v1/semantic_model.py @@ -21,6 +21,14 @@ class Defaults(dbtClassMixin): agg_time_dimension: Optional[str] = None +@dataclass +class NodeRelation(dbtClassMixin): + alias: str + schema_name: str # TODO: Could this be called simply "schema" so we could reuse StateRelation? + database: Optional[str] = None + relation_name: Optional[str] = None + + # ==================================== # Dimension objects # ==================================== diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index ccb215a027b..e8649226778 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -89,6 +89,7 @@ Documentation as DocumentationResource, Macro as MacroResource, Metric as MetricResource, + NodeRelation as NodeRelationResource, NodeVersion, Group as GroupResource, GraphResource, @@ -1519,18 +1520,10 @@ class Group(GroupResource, BaseNode): # ==================================== -@dataclass -class NodeRelation(dbtClassMixin): - alias: str - schema_name: str # TODO: Could this be called simply "schema" so we could reuse StateRelation? - database: Optional[str] = None - relation_name: Optional[str] = None - - @dataclass class SemanticModel(GraphNode): model: str - node_relation: Optional[NodeRelation] + node_relation: Optional[NodeRelationResource] description: Optional[str] = None label: Optional[str] = None defaults: Optional[DefaultsResource] = None diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 644009a06cb..66cccfbcac0 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -106,9 +106,8 @@ ManifestNode, ResultNode, ModelNode, - NodeRelation, ) -from dbt.artifacts.resources import NodeVersion +from dbt.artifacts.resources import NodeRelation, NodeVersion from dbt.artifacts.schemas.base import Writable from dbt.exceptions import ( TargetNotFoundError, diff --git a/tests/unit/test_graph_selector_methods.py b/tests/unit/test_graph_selector_methods.py index ba7ff1c0c45..2b06a1dbdb8 100644 --- a/tests/unit/test_graph_selector_methods.py +++ b/tests/unit/test_graph_selector_methods.py @@ -15,7 +15,6 @@ Exposure, Metric, Group, - NodeRelation, SavedQuery, SeedNode, SemanticModel, @@ -32,6 +31,7 @@ from dbt.artifacts.resources import ( MetricInputMeasure, MetricTypeParams, + NodeRelation, QueryParams, ) from dbt.contracts.graph.unparsed import ( diff --git a/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py b/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py index bb4087d7b0e..e2765499355 100644 --- a/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py +++ b/tests/unit/test_semantic_layer_nodes_satisfy_protocols.py @@ -3,7 +3,6 @@ from dbt.contracts.graph.nodes import ( Metric, - NodeRelation, SavedQuery, SemanticModel, ) @@ -22,6 +21,7 @@ MetricInputMeasure, MetricTimeWindow, MetricTypeParams, + NodeRelation, NonAdditiveDimension, SourceFileMetadata, WhereFilter, From 42f4d5cb88b90a28d585285ef37c82e438a3d5b7 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Mon, 29 Jan 2024 16:05:15 -0800 Subject: [PATCH 03/10] Move `SemanticModelConfig` to dbt/artifacts --- core/dbt/artifacts/resources/__init__.py | 1 + .../artifacts/resources/v1/semantic_model.py | 23 +++++++++++++++++-- core/dbt/contracts/graph/model_config.py | 19 ++++----------- core/dbt/contracts/graph/nodes.py | 2 +- .../test_semantic_model_configs.py | 3 +-- 5 files changed, 29 insertions(+), 19 deletions(-) diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 6354df76e9a..ccaa383e086 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -41,4 +41,5 @@ MeasureAggregationParameters, NodeRelation, NonAdditiveDimension, + SemanticModelConfig, ) diff --git a/core/dbt/artifacts/resources/v1/semantic_model.py b/core/dbt/artifacts/resources/v1/semantic_model.py index 15d406e0715..6c8778fbff9 100644 --- a/core/dbt/artifacts/resources/v1/semantic_model.py +++ b/core/dbt/artifacts/resources/v1/semantic_model.py @@ -1,4 +1,5 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field +from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior from dbt_common.dataclass_schema import dbtClassMixin from dbt_semantic_interfaces.references import ( DimensionReference, @@ -13,7 +14,7 @@ TimeGranularity, ) from dbt.artifacts.resources import SourceFileMetadata -from typing import List, Optional +from typing import Any, Dict, List, Optional @dataclass @@ -133,3 +134,21 @@ class Measure(dbtClassMixin): @property def reference(self) -> MeasureReference: return MeasureReference(element_name=self.name) + + +# ==================================== +# SemanticModel final parts +# ==================================== + + +@dataclass +class SemanticModelConfig(BaseConfig): + enabled: bool = True + group: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + meta: Dict[str, Any] = field( + default_factory=dict, + metadata=MergeBehavior.Update.meta(), + ) diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 15b721cfe9b..a2fc7801d8d 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -2,7 +2,11 @@ from typing import Any, List, Optional, Dict, Union, Type from typing_extensions import Annotated -from dbt.artifacts.resources import MetricConfig, SavedQueryConfig +from dbt.artifacts.resources import ( + MetricConfig, + SavedQueryConfig, + SemanticModelConfig, +) from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior from dbt_common.contracts.config.materialization import OnConfigurationChangeOption from dbt_common.contracts.config.metadata import Metadata, ShowBehavior @@ -49,19 +53,6 @@ class Hook(dbtClassMixin, Replaceable): index: Optional[int] = None -@dataclass -class SemanticModelConfig(BaseConfig): - enabled: bool = True - group: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - meta: Dict[str, Any] = field( - default_factory=dict, - metadata=MergeBehavior.Update.meta(), - ) - - @dataclass class ExposureConfig(BaseConfig): enabled: bool = True diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index e8649226778..d1d8f869e84 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -71,7 +71,6 @@ ExposureConfig, EmptySnapshotConfig, SnapshotConfig, - SemanticModelConfig, UnitTestConfig, UnitTestNodeConfig, ) @@ -95,6 +94,7 @@ GraphResource, RefArgs as RefArgsResource, SavedQuery as SavedQueryResource, + SemanticModelConfig, SourceFileMetadata as SourceFileMetadataResource, ) diff --git a/tests/functional/semantic_models/test_semantic_model_configs.py b/tests/functional/semantic_models/test_semantic_model_configs.py index bd74ad95edd..407fb2c3d4d 100644 --- a/tests/functional/semantic_models/test_semantic_model_configs.py +++ b/tests/functional/semantic_models/test_semantic_model_configs.py @@ -1,7 +1,6 @@ import pytest +from dbt.artifacts.resources import SemanticModelConfig from dbt.exceptions import ParsingError -from dbt.contracts.graph.model_config import SemanticModelConfig - from dbt.tests.util import run_dbt, update_config_file, get_manifest from tests.functional.semantic_models.fixtures import ( From 5e1b97c6af5136a78a8c051bccb8f5345defe938 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Mon, 29 Jan 2024 16:38:10 -0800 Subject: [PATCH 04/10] Move data portion of `SemanticModel` to dbt/artifacts --- .../Under the Hood-20240129-163800.yaml | 6 + core/dbt/artifacts/resources/__init__.py | 1 + .../artifacts/resources/v1/semantic_model.py | 109 +++++++++++++++- core/dbt/contracts/graph/nodes.py | 117 +----------------- 4 files changed, 118 insertions(+), 115 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240129-163800.yaml diff --git a/.changes/unreleased/Under the Hood-20240129-163800.yaml b/.changes/unreleased/Under the Hood-20240129-163800.yaml new file mode 100644 index 00000000000..0e724751aae --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240129-163800.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Move data portion of `SemanticModel` to dbt/artifacts +time: 2024-01-29T16:38:00.245253-08:00 +custom: + Author: QMalcolm + Issue: "9387" diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index ccaa383e086..80727abb4aa 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -41,5 +41,6 @@ MeasureAggregationParameters, NodeRelation, NonAdditiveDimension, + SemanticModel, SemanticModelConfig, ) diff --git a/core/dbt/artifacts/resources/v1/semantic_model.py b/core/dbt/artifacts/resources/v1/semantic_model.py index 6c8778fbff9..220e5e71936 100644 --- a/core/dbt/artifacts/resources/v1/semantic_model.py +++ b/core/dbt/artifacts/resources/v1/semantic_model.py @@ -1,10 +1,16 @@ +import time + from dataclasses import dataclass, field +from dbt.artifacts.resources.base import GraphResource +from dbt.artifacts.resources.v1.components import DependsOn, RefArgs from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior from dbt_common.dataclass_schema import dbtClassMixin from dbt_semantic_interfaces.references import ( DimensionReference, EntityReference, + LinkableElementReference, MeasureReference, + SemanticModelReference, TimeDimensionReference, ) from dbt_semantic_interfaces.type_enums import ( @@ -14,7 +20,7 @@ TimeGranularity, ) from dbt.artifacts.resources import SourceFileMetadata -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Sequence @dataclass @@ -152,3 +158,104 @@ class SemanticModelConfig(BaseConfig): default_factory=dict, metadata=MergeBehavior.Update.meta(), ) + + +@dataclass +class SemanticModel(GraphResource): + model: str + node_relation: Optional[NodeRelation] + description: Optional[str] = None + label: Optional[str] = None + defaults: Optional[Defaults] = None + entities: Sequence[Entity] = field(default_factory=list) + measures: Sequence[Measure] = field(default_factory=list) + dimensions: Sequence[Dimension] = field(default_factory=list) + metadata: Optional[SourceFileMetadata] = None + depends_on: DependsOn = field(default_factory=DependsOn) + refs: List[RefArgs] = field(default_factory=list) + created_at: float = field(default_factory=lambda: time.time()) + config: SemanticModelConfig = field(default_factory=SemanticModelConfig) + unrendered_config: Dict[str, Any] = field(default_factory=dict) + primary_entity: Optional[str] = None + group: Optional[str] = None + + @property + def entity_references(self) -> List[LinkableElementReference]: + return [entity.reference for entity in self.entities] + + @property + def dimension_references(self) -> List[LinkableElementReference]: + return [dimension.reference for dimension in self.dimensions] + + @property + def measure_references(self) -> List[MeasureReference]: + return [measure.reference for measure in self.measures] + + @property + def has_validity_dimensions(self) -> bool: + return any([dim.validity_params is not None for dim in self.dimensions]) + + @property + def validity_start_dimension(self) -> Optional[Dimension]: + validity_start_dims = [ + dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start + ] + if not validity_start_dims: + return None + return validity_start_dims[0] + + @property + def validity_end_dimension(self) -> Optional[Dimension]: + validity_end_dims = [ + dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end + ] + if not validity_end_dims: + return None + return validity_end_dims[0] + + @property + def partitions(self) -> List[Dimension]: # noqa: D + return [dim for dim in self.dimensions or [] if dim.is_partition] + + @property + def partition(self) -> Optional[Dimension]: + partitions = self.partitions + if not partitions: + return None + return partitions[0] + + @property + def reference(self) -> SemanticModelReference: + return SemanticModelReference(semantic_model_name=self.name) + + def checked_agg_time_dimension_for_measure( + self, measure_reference: MeasureReference + ) -> TimeDimensionReference: + measure: Optional[Measure] = None + for measure in self.measures: + if measure.reference == measure_reference: + measure = measure + + assert ( + measure is not None + ), f"No measure with name ({measure_reference.element_name}) in semantic_model with name ({self.name})" + + default_agg_time_dimension = ( + self.defaults.agg_time_dimension if self.defaults is not None else None + ) + + agg_time_dimension_name = measure.agg_time_dimension or default_agg_time_dimension + assert agg_time_dimension_name is not None, ( + f"Aggregation time dimension for measure {measure.name} on semantic model {self.name} is not set! " + "To fix this either specify a default `agg_time_dimension` for the semantic model or define an " + "`agg_time_dimension` on the measure directly." + ) + return TimeDimensionReference(element_name=agg_time_dimension_name) + + @property + def primary_entity_reference(self) -> Optional[EntityReference]: + return ( + EntityReference(element_name=self.primary_entity) + if self.primary_entity is not None + else None + ) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index d1d8f869e84..ee41f0be477 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -54,13 +54,6 @@ REFABLE_NODE_TYPES, VERSIONED_NODE_TYPES, ) -from dbt_semantic_interfaces.references import ( - EntityReference, - MeasureReference, - LinkableElementReference, - SemanticModelReference, - TimeDimensionReference, -) from .model_config import ( NodeConfig, @@ -77,25 +70,19 @@ from dbt.artifacts.resources import ( BaseResource, - Defaults as DefaultsResource, DependsOn, - Dimension as DimensionResource, Docs, - Entity as EntityResource, MacroDependsOn, MacroArgument, - Measure as MeasureResource, Documentation as DocumentationResource, Macro as MacroResource, Metric as MetricResource, - NodeRelation as NodeRelationResource, NodeVersion, Group as GroupResource, GraphResource, RefArgs as RefArgsResource, SavedQuery as SavedQueryResource, - SemanticModelConfig, - SourceFileMetadata as SourceFileMetadataResource, + SemanticModel as SemanticModelResource, ) # ===================================================================== @@ -1516,78 +1503,12 @@ class Group(GroupResource, BaseNode): # ==================================== -# SemanticModel and related classes +# SemanticModel node # ==================================== @dataclass -class SemanticModel(GraphNode): - model: str - node_relation: Optional[NodeRelationResource] - description: Optional[str] = None - label: Optional[str] = None - defaults: Optional[DefaultsResource] = None - entities: Sequence[EntityResource] = field(default_factory=list) - measures: Sequence[MeasureResource] = field(default_factory=list) - dimensions: Sequence[DimensionResource] = field(default_factory=list) - metadata: Optional[SourceFileMetadataResource] = None - depends_on: DependsOn = field(default_factory=DependsOn) - refs: List[RefArgsResource] = field(default_factory=list) - created_at: float = field(default_factory=lambda: time.time()) - config: SemanticModelConfig = field(default_factory=SemanticModelConfig) - unrendered_config: Dict[str, Any] = field(default_factory=dict) - primary_entity: Optional[str] = None - group: Optional[str] = None - - @property - def entity_references(self) -> List[LinkableElementReference]: - return [entity.reference for entity in self.entities] - - @property - def dimension_references(self) -> List[LinkableElementReference]: - return [dimension.reference for dimension in self.dimensions] - - @property - def measure_references(self) -> List[MeasureReference]: - return [measure.reference for measure in self.measures] - - @property - def has_validity_dimensions(self) -> bool: - return any([dim.validity_params is not None for dim in self.dimensions]) - - @property - def validity_start_dimension(self) -> Optional[DimensionResource]: - validity_start_dims = [ - dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start - ] - if not validity_start_dims: - return None - return validity_start_dims[0] - - @property - def validity_end_dimension(self) -> Optional[DimensionResource]: - validity_end_dims = [ - dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end - ] - if not validity_end_dims: - return None - return validity_end_dims[0] - - @property - def partitions(self) -> List[DimensionResource]: # noqa: D - return [dim for dim in self.dimensions or [] if dim.is_partition] - - @property - def partition(self) -> Optional[DimensionResource]: - partitions = self.partitions - if not partitions: - return None - return partitions[0] - - @property - def reference(self) -> SemanticModelReference: - return SemanticModelReference(semantic_model_name=self.name) - +class SemanticModel(GraphNode, SemanticModelResource): @property def depends_on_nodes(self): return self.depends_on.nodes @@ -1596,38 +1517,6 @@ def depends_on_nodes(self): def depends_on_macros(self): return self.depends_on.macros - def checked_agg_time_dimension_for_measure( - self, measure_reference: MeasureReference - ) -> TimeDimensionReference: - measure: Optional[MeasureResource] = None - for measure in self.measures: - if measure.reference == measure_reference: - measure = measure - - assert ( - measure is not None - ), f"No measure with name ({measure_reference.element_name}) in semantic_model with name ({self.name})" - - default_agg_time_dimension = ( - self.defaults.agg_time_dimension if self.defaults is not None else None - ) - - agg_time_dimension_name = measure.agg_time_dimension or default_agg_time_dimension - assert agg_time_dimension_name is not None, ( - f"Aggregation time dimension for measure {measure.name} on semantic model {self.name} is not set! " - "To fix this either specify a default `agg_time_dimension` for the semantic model or define an " - "`agg_time_dimension` on the measure directly." - ) - return TimeDimensionReference(element_name=agg_time_dimension_name) - - @property - def primary_entity_reference(self) -> Optional[EntityReference]: - return ( - EntityReference(element_name=self.primary_entity) - if self.primary_entity is not None - else None - ) - def same_model(self, old: "SemanticModel") -> bool: return self.model == old.same_model From 0365498191f8f1c78026191aa0a5f44c7e24b5c6 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Mon, 29 Jan 2024 16:41:46 -0800 Subject: [PATCH 05/10] Add contextual comments to `semantic_model.py` about DSI protocols --- core/dbt/artifacts/resources/v1/semantic_model.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/dbt/artifacts/resources/v1/semantic_model.py b/core/dbt/artifacts/resources/v1/semantic_model.py index 220e5e71936..b219b2bdcc8 100644 --- a/core/dbt/artifacts/resources/v1/semantic_model.py +++ b/core/dbt/artifacts/resources/v1/semantic_model.py @@ -23,6 +23,15 @@ from typing import Any, Dict, List, Optional, Sequence +""" +The classes in this file are dataclasses which are used to construct the Semantic +Model node in dbt-core. Additionally, these classes need to at a minimum support +what is specified in their protocol definitions in dbt-semantic-interfaces. +Their protocol definitions can be found here: +https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/semantic_model.py +""" + + @dataclass class Defaults(dbtClassMixin): agg_time_dimension: Optional[str] = None @@ -38,6 +47,7 @@ class NodeRelation(dbtClassMixin): # ==================================== # Dimension objects +# Dimension protocols: https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/dimension.py # ==================================== @@ -85,6 +95,7 @@ def validity_params(self) -> Optional[DimensionValidityParams]: # ==================================== # Entity objects +# Entity protocols: https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/entity.py # ==================================== @@ -108,6 +119,7 @@ def is_linkable_entity_type(self) -> bool: # ==================================== # Measure objects +# Measure protocols: https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/measure.py # ==================================== From 4b307daddc7ae5659156438de314eb0402fe5ba0 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 30 Jan 2024 10:26:21 -0800 Subject: [PATCH 06/10] Fixup mypy complaint --- core/dbt/parser/schema_yaml_readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index a427380ae6c..2b64312bf87 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -43,7 +43,7 @@ QueryParams, SavedQueryConfig, WhereFilter, - WhereFilterIntersection + WhereFilterIntersection, ) from dbt_common.exceptions import DbtInternalError from dbt.exceptions import YamlParseDictError, JSONValidationError From 8b1aa7710d8281f8af140ea1008b1b68525bb482 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 30 Jan 2024 10:28:17 -0800 Subject: [PATCH 07/10] Migrate v12 manifest to use artifact definitions of `SavedQuery`, `Metric`, and `SemanticModel` --- core/dbt/artifacts/schemas/manifest/v12/manifest.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/dbt/artifacts/schemas/manifest/v12/manifest.py b/core/dbt/artifacts/schemas/manifest/v12/manifest.py index d00740346e1..a598fac013e 100644 --- a/core/dbt/artifacts/schemas/manifest/v12/manifest.py +++ b/core/dbt/artifacts/schemas/manifest/v12/manifest.py @@ -9,7 +9,14 @@ get_artifact_schema_version, ) from dbt.artifacts.schemas.upgrades import upgrade_manifest_json -from dbt.artifacts.resources import Documentation, Group, Macro +from dbt.artifacts.resources import ( + Documentation, + Group, + Macro, + Metric, + SavedQuery, + SemanticModel, +) # TODO: remove usage of dbt modules other than dbt.artifacts from dbt import tracking @@ -18,9 +25,6 @@ Exposure, GraphMemberNode, ManifestNode, - Metric, - SavedQuery, - SemanticModel, SourceDefinition, UnitTestDefinition, ) From 6ff7ee733cc9c1e8ae1d961ebca82058ab03ba89 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Wed, 31 Jan 2024 12:09:56 -0800 Subject: [PATCH 08/10] Convert `SemanticModel` and `Metric` resources to full nodes in selector search In the `search` method in `selector_methods.py`, we were getting object representations from the incoming writable manifest by unique id. What we get from the writable manifest though is increasingly the `resource` (data artifact) part of the node, not the full node. This was problematic because a number of the selector processes _compare_ the old node to the new node, but the `resource` representation doesn't have the comparator methods. In this commit we dict-ify the resource and then get the full node by undictifying that. We should probably have a better built in process to the full node objects to do this, but this will do for now. --- core/dbt/graph/selector_methods.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/dbt/graph/selector_methods.py b/core/dbt/graph/selector_methods.py index e9a627b55b2..8ef16dd6643 100644 --- a/core/dbt/graph/selector_methods.py +++ b/core/dbt/graph/selector_methods.py @@ -737,9 +737,11 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu elif unique_id in manifest.exposures: previous_node = manifest.exposures[unique_id] elif unique_id in manifest.metrics: - previous_node = manifest.metrics[unique_id] + previous_node = Metric.from_dict(manifest.metrics[unique_id].to_dict()) elif unique_id in manifest.semantic_models: - previous_node = manifest.semantic_models[unique_id] + previous_node = SemanticModel.from_dict( + manifest.semantic_models[unique_id].to_dict() + ) elif unique_id in manifest.unit_tests: previous_node = manifest.unit_tests[unique_id] From e4dc50ca14f2444515cef5e82f4f65af49065e3c Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Wed, 31 Jan 2024 13:11:14 -0800 Subject: [PATCH 09/10] Add `from_resource` implementation on `BaseNode` to ease resource to node conversion We want to easily be able to create nodes from their resource counter parts. It's actually imperative that we can do so. The previous commit had a manual way to do so where needed. However, we don't want to have to put `from_dict(.to_dict())` everywhere. So here we hadded a `from_resource` class method to `BaseNode`. Everything that inherits from `BaseNode` thus automatically gets this functionality. HOWEVER, the implementation currently has a problem. Specifically, the type for `resource_instance` is `BaseResource`. Which means if one is calling say `Metric.from_resource()`, one could hand it a `SemanticModelResource` and mypy won't complain. In this case, a semi-cryptic error might get raised at runtime. Whether or not an error gets raised depends entirely on whether or not the dictified resource instance manages to satisfy all the required attributes of the desired node class. THIS IS VERY BAD. We should be able to solve this issue in an upcoming (hopefully next) commit, wherein we genericize `BaseNode` such that when inheriting it you declare it with a resource type. Technically a runtime error will still be possible, however any mixups should be caught by mypy on pre-commit hooks as well as PRs. --- core/dbt/contracts/graph/nodes.py | 4 ++++ core/dbt/graph/selector_methods.py | 6 ++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index ee41f0be477..9d99c975740 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -148,6 +148,10 @@ def is_ephemeral_model(self): def get_materialization(self): return self.config.materialized + @classmethod + def from_resource(cls, resource_instance: BaseResource): + return cls.from_dict(resource_instance.to_dict()) + @dataclass class GraphNode(GraphResource, BaseNode): diff --git a/core/dbt/graph/selector_methods.py b/core/dbt/graph/selector_methods.py index 8ef16dd6643..cd2c3af934f 100644 --- a/core/dbt/graph/selector_methods.py +++ b/core/dbt/graph/selector_methods.py @@ -737,11 +737,9 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu elif unique_id in manifest.exposures: previous_node = manifest.exposures[unique_id] elif unique_id in manifest.metrics: - previous_node = Metric.from_dict(manifest.metrics[unique_id].to_dict()) + previous_node = Metric.from_resource(manifest.metrics[unique_id]) elif unique_id in manifest.semantic_models: - previous_node = SemanticModel.from_dict( - manifest.semantic_models[unique_id].to_dict() - ) + previous_node = SemanticModel.from_resource(manifest.semantic_models[unique_id]) elif unique_id in manifest.unit_tests: previous_node = manifest.unit_tests[unique_id] From 32a12faf31f25608d5b63197440feb756720b87e Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Wed, 31 Jan 2024 16:09:08 -0800 Subject: [PATCH 10/10] Make `BaseNode` a generic that is defined with a `ResourceType` Turning `BaseNode` into an ABC generic allows us to say that the inheriting class can define what resource type from artifacts it should be used with. This gives us added type safety to what resource type can be passed into `from_resource` when called via `SemanticModel.from_resource(...)`, `Metric.from_resource(...)`, and etc. NOTE: This only gives us type safety from mypy. If we begin ignoring mypy errors during development, we can still get into a situation for runtime errors (it's just harder to do so now). --- core/dbt/contracts/graph/nodes.py | 38 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index 9d99c975740..28d8bd34142 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -4,8 +4,21 @@ from dataclasses import dataclass, field import hashlib +from abc import ABC from mashumaro.types import SerializableType -from typing import Optional, Union, List, Dict, Any, Sequence, Tuple, Iterator, Literal +from typing import ( + Optional, + Union, + List, + Dict, + Any, + Sequence, + Tuple, + Iterator, + Literal, + Generic, + TypeVar, +) from dbt import deprecations from dbt_common.contracts.constraints import ( @@ -108,8 +121,11 @@ # ================================================== +ResourceTypeT = TypeVar("ResourceTypeT", bound="BaseResource") + + @dataclass -class BaseNode(BaseResource): +class BaseNode(ABC, Generic[ResourceTypeT], BaseResource): """All nodes or node-like objects in this file should have this as a base class""" @property @@ -149,12 +165,12 @@ def get_materialization(self): return self.config.materialized @classmethod - def from_resource(cls, resource_instance: BaseResource): + def from_resource(cls, resource_instance: ResourceTypeT): return cls.from_dict(resource_instance.to_dict()) @dataclass -class GraphNode(GraphResource, BaseNode): +class GraphNode(GraphResource, BaseNode[ResourceTypeT], Generic[ResourceTypeT]): """Nodes in the DAG. Macro and Documentation don't have fqn.""" def same_fqn(self, other) -> bool: @@ -217,7 +233,7 @@ def identifier(self): @dataclass -class ParsedNodeMandatory(GraphNode, HasRelationMetadata, Replaceable): +class ParsedNodeMandatory(GraphNode[GraphResource], HasRelationMetadata, Replaceable): alias: str checksum: FileHash config: NodeConfig = field(default_factory=NodeConfig) @@ -1012,7 +1028,7 @@ class UnitTestDefinitionMandatory: @dataclass -class UnitTestDefinition(NodeInfoMixin, GraphNode, UnitTestDefinitionMandatory): +class UnitTestDefinition(NodeInfoMixin, GraphNode[GraphResource], UnitTestDefinitionMandatory): description: str = "" overrides: Optional[UnitTestOverrides] = None depends_on: DependsOn = field(default_factory=DependsOn) @@ -1229,7 +1245,7 @@ def tests(self) -> List[TestDef]: @dataclass -class ParsedSourceMandatory(GraphNode, HasRelationMetadata): +class ParsedSourceMandatory(GraphNode[GraphResource], HasRelationMetadata): source_name: str source_description: str loader: str @@ -1362,7 +1378,7 @@ def group(self): @dataclass -class Exposure(GraphNode): +class Exposure(GraphNode[GraphResource]): type: ExposureType owner: Owner resource_type: Literal[NodeType.Exposure] @@ -1445,7 +1461,7 @@ def group(self): @dataclass -class Metric(GraphNode, MetricResource): +class Metric(GraphNode[MetricResource], MetricResource): @property def depends_on_nodes(self): return self.depends_on.nodes @@ -1512,7 +1528,7 @@ class Group(GroupResource, BaseNode): @dataclass -class SemanticModel(GraphNode, SemanticModelResource): +class SemanticModel(GraphNode[SemanticModelResource], SemanticModelResource): @property def depends_on_nodes(self): return self.depends_on.nodes @@ -1578,7 +1594,7 @@ def same_contents(self, old: Optional["SemanticModel"]) -> bool: @dataclass -class SavedQuery(NodeInfoMixin, GraphNode, SavedQueryResource): +class SavedQuery(NodeInfoMixin, GraphNode[SavedQueryResource], SavedQueryResource): def same_metrics(self, old: "SavedQuery") -> bool: return self.query_params.metrics == old.query_params.metrics