Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move SemanticModel data artifacts to dbt/artifacts #9485

Merged
merged 10 commits into from
Feb 1, 2024
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20240129-163800.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Move data portion of `SemanticModel` to dbt/artifacts
time: 2024-01-29T16:38:00.245253-08:00
custom:
Author: QMalcolm
Issue: "9387"
13 changes: 13 additions & 0 deletions core/dbt/artifacts/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,16 @@
WhereFilter,
WhereFilterIntersection,
)
from dbt.artifacts.resources.v1.semantic_model import (
Defaults,
Dimension,
DimensionTypeParams,
DimensionValidityParams,
Entity,
Measure,
MeasureAggregationParameters,
NodeRelation,
NonAdditiveDimension,
SemanticModel,
SemanticModelConfig,
)
273 changes: 273 additions & 0 deletions core/dbt/artifacts/resources/v1/semantic_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
import time

from dataclasses import dataclass, field
from dbt.artifacts.resources.base import GraphResource
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
LinkableElementReference,
MeasureReference,
SemanticModelReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums import (
AggregationType,
DimensionType,
EntityType,
TimeGranularity,
)
from dbt.artifacts.resources import SourceFileMetadata
from typing import Any, Dict, List, Optional, Sequence


"""
The classes in this file are dataclasses which are used to construct the Semantic
Model node in dbt-core. Additionally, these classes need to at a minimum support
what is specified in their protocol definitions in dbt-semantic-interfaces.
Their protocol definitions can be found here:
https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/semantic_model.py
"""


@dataclass
class Defaults(dbtClassMixin):
agg_time_dimension: Optional[str] = None


@dataclass
class NodeRelation(dbtClassMixin):
alias: str
schema_name: str # TODO: Could this be called simply "schema" so we could reuse StateRelation?
database: Optional[str] = None
relation_name: Optional[str] = None


# ====================================
# Dimension objects
# Dimension protocols: https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/dimension.py
# ====================================


@dataclass
class DimensionValidityParams(dbtClassMixin):
is_start: bool = False
is_end: bool = False


@dataclass
class DimensionTypeParams(dbtClassMixin):
time_granularity: TimeGranularity
validity_params: Optional[DimensionValidityParams] = None


@dataclass
class Dimension(dbtClassMixin):
name: str
type: DimensionType
description: Optional[str] = None
label: Optional[str] = None
is_partition: bool = False
type_params: Optional[DimensionTypeParams] = None
expr: Optional[str] = None
metadata: Optional[SourceFileMetadata] = None

@property
def reference(self) -> DimensionReference:
return DimensionReference(element_name=self.name)

@property
def time_dimension_reference(self) -> Optional[TimeDimensionReference]:
if self.type == DimensionType.TIME:
return TimeDimensionReference(element_name=self.name)
else:
return None

Check warning on line 86 in core/dbt/artifacts/resources/v1/semantic_model.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/artifacts/resources/v1/semantic_model.py#L86

Added line #L86 was not covered by tests

@property
def validity_params(self) -> Optional[DimensionValidityParams]:
if self.type_params:
return self.type_params.validity_params
else:
return None


# ====================================
# Entity objects
# Entity protocols: https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/entity.py
# ====================================


@dataclass
class Entity(dbtClassMixin):
name: str
type: EntityType
description: Optional[str] = None
label: Optional[str] = None
role: Optional[str] = None
expr: Optional[str] = None

@property
def reference(self) -> EntityReference:
return EntityReference(element_name=self.name)

@property
def is_linkable_entity_type(self) -> bool:
return self.type in (EntityType.PRIMARY, EntityType.UNIQUE, EntityType.NATURAL)


# ====================================
# Measure objects
# Measure protocols: https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/dbt_semantic_interfaces/protocols/measure.py
# ====================================


@dataclass
class MeasureAggregationParameters(dbtClassMixin):
percentile: Optional[float] = None
use_discrete_percentile: bool = False
use_approximate_percentile: bool = False


@dataclass
class NonAdditiveDimension(dbtClassMixin):
name: str
window_choice: AggregationType
window_groupings: List[str]


@dataclass
class Measure(dbtClassMixin):
name: str
agg: AggregationType
description: Optional[str] = None
label: Optional[str] = None
create_metric: bool = False
expr: Optional[str] = None
agg_params: Optional[MeasureAggregationParameters] = None
non_additive_dimension: Optional[NonAdditiveDimension] = None
agg_time_dimension: Optional[str] = None

@property
def reference(self) -> MeasureReference:
return MeasureReference(element_name=self.name)


# ====================================
# SemanticModel final parts
# ====================================


@dataclass
class SemanticModelConfig(BaseConfig):
enabled: bool = True
group: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
meta: Dict[str, Any] = field(
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
)


@dataclass
class SemanticModel(GraphResource):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class has a fair number of @property methods, and even a method that isn't @property'd. Felt kind of weird moving them all, however they are expected parts of the dbt-semantic-interfaces protocol specification for SemanticModel. I would like to reduce some of these in the protocol itself, but for now keeping them together in this implementation seems right-ish

model: str
node_relation: Optional[NodeRelation]
description: Optional[str] = None
label: Optional[str] = None
defaults: Optional[Defaults] = None
entities: Sequence[Entity] = field(default_factory=list)
measures: Sequence[Measure] = field(default_factory=list)
dimensions: Sequence[Dimension] = field(default_factory=list)
metadata: Optional[SourceFileMetadata] = None
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[RefArgs] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
config: SemanticModelConfig = field(default_factory=SemanticModelConfig)
unrendered_config: Dict[str, Any] = field(default_factory=dict)
primary_entity: Optional[str] = None
group: Optional[str] = None

@property
def entity_references(self) -> List[LinkableElementReference]:
return [entity.reference for entity in self.entities]

@property
def dimension_references(self) -> List[LinkableElementReference]:
return [dimension.reference for dimension in self.dimensions]

@property
def measure_references(self) -> List[MeasureReference]:
return [measure.reference for measure in self.measures]

@property
def has_validity_dimensions(self) -> bool:
return any([dim.validity_params is not None for dim in self.dimensions])

@property
def validity_start_dimension(self) -> Optional[Dimension]:
validity_start_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start
]
if not validity_start_dims:
return None
return validity_start_dims[0]

Check warning on line 217 in core/dbt/artifacts/resources/v1/semantic_model.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/artifacts/resources/v1/semantic_model.py#L217

Added line #L217 was not covered by tests

@property
def validity_end_dimension(self) -> Optional[Dimension]:
validity_end_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end
]
if not validity_end_dims:
return None
return validity_end_dims[0]

Check warning on line 226 in core/dbt/artifacts/resources/v1/semantic_model.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/artifacts/resources/v1/semantic_model.py#L226

Added line #L226 was not covered by tests

@property
def partitions(self) -> List[Dimension]: # noqa: D
return [dim for dim in self.dimensions or [] if dim.is_partition]

@property
def partition(self) -> Optional[Dimension]:
partitions = self.partitions
if not partitions:
return None
return partitions[0]

Check warning on line 237 in core/dbt/artifacts/resources/v1/semantic_model.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/artifacts/resources/v1/semantic_model.py#L237

Added line #L237 was not covered by tests

@property
def reference(self) -> SemanticModelReference:
return SemanticModelReference(semantic_model_name=self.name)

def checked_agg_time_dimension_for_measure(
self, measure_reference: MeasureReference
) -> TimeDimensionReference:
measure: Optional[Measure] = None
for measure in self.measures:
if measure.reference == measure_reference:
measure = measure

assert (
measure is not None
), f"No measure with name ({measure_reference.element_name}) in semantic_model with name ({self.name})"

default_agg_time_dimension = (
self.defaults.agg_time_dimension if self.defaults is not None else None
)

agg_time_dimension_name = measure.agg_time_dimension or default_agg_time_dimension
assert agg_time_dimension_name is not None, (
f"Aggregation time dimension for measure {measure.name} on semantic model {self.name} is not set! "
"To fix this either specify a default `agg_time_dimension` for the semantic model or define an "
"`agg_time_dimension` on the measure directly."
)
return TimeDimensionReference(element_name=agg_time_dimension_name)

@property
def primary_entity_reference(self) -> Optional[EntityReference]:
return (
EntityReference(element_name=self.primary_entity)
if self.primary_entity is not None
else None
)
19 changes: 5 additions & 14 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from typing import Any, List, Optional, Dict, Union, Type
from typing_extensions import Annotated

from dbt.artifacts.resources import MetricConfig, SavedQueryConfig
from dbt.artifacts.resources import (
MetricConfig,
SavedQueryConfig,
SemanticModelConfig,
)
from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior
from dbt_common.contracts.config.materialization import OnConfigurationChangeOption
from dbt_common.contracts.config.metadata import Metadata, ShowBehavior
Expand Down Expand Up @@ -49,19 +53,6 @@ class Hook(dbtClassMixin, Replaceable):
index: Optional[int] = None


@dataclass
class SemanticModelConfig(BaseConfig):
enabled: bool = True
group: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
meta: Dict[str, Any] = field(
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
)


@dataclass
class ExposureConfig(BaseConfig):
enabled: bool = True
Expand Down
Loading
Loading