Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CT-2691 Fix the populating of a Metric's depends_on property #8015

Merged
merged 18 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ def link_node(self, node: GraphMemberNode, manifest: Manifest):
self.dependency(node.unique_id, (manifest.sources[dependency].unique_id))
elif dependency in manifest.metrics:
self.dependency(node.unique_id, (manifest.metrics[dependency].unique_id))
elif dependency in manifest.semantic_models:
self.dependency(node.unique_id, (manifest.semantic_models[dependency].unique_id))
else:
raise GraphDependencyNotFoundError(node, dependency)

Expand Down
75 changes: 75 additions & 0 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import enum
from collections import defaultdict
from dataclasses import dataclass, field
from itertools import chain, islice
from mashumaro.mixins.msgpack import DataClassMessagePackMixin
from multiprocessing.synchronize import Lock
from typing import (
DefaultDict,
Dict,
List,
Optional,
Expand Down Expand Up @@ -297,6 +299,49 @@ def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> Metric:
return manifest.metrics[unique_id]


class SemanticModelByMeasureLookup(dbtClassMixin):
"""Lookup utility for finding SemanticModel by measure

This is possible because measure names are supposed to be unique across
the semantic models in a manifest.
"""

def __init__(self, manifest: "Manifest"):
self.storage: DefaultDict[str, Dict[PackageName, UniqueID]] = defaultdict(dict)
self.populate(manifest)

def get_unique_id(self, search_name: str, package: Optional[PackageName]):
return find_unique_id_for_package(self.storage, search_name, package)

def find(
self, search_name: str, package: Optional[PackageName], manifest: "Manifest"
) -> Optional[SemanticModel]:
"""Tries to find a SemanticModel based on a measure name"""
unique_id = self.get_unique_id(search_name, package)
if unique_id is not None:
return self.perform_lookup(unique_id, manifest)
return None

def add(self, semantic_model: SemanticModel):
"""Sets all measures for a SemanticModel as paths to the SemanticModel's `unique_id`"""
for measure in semantic_model.measures:
self.storage[measure.name][semantic_model.package_name] = semantic_model.unique_id

def populate(self, manifest: "Manifest"):
"""Populate storage with all the measure + package paths to the Manifest's SemanticModels"""
for semantic_model in manifest.semantic_models.values():
self.add(semantic_model=semantic_model)

def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> SemanticModel:
"""Tries to get a SemanticModel from the Manifest"""
semantic_model = manifest.semantic_models.get(unique_id)
if semantic_model is None:
raise dbt.exceptions.DbtInternalError(
f"Semantic model `{unique_id}` found in cache but not found in manifest"
)
return semantic_model


# This handles both models/seeds/snapshots and sources/metrics/exposures
class DisabledLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
Expand Down Expand Up @@ -710,6 +755,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
_metric_lookup: Optional[MetricLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_semantic_model_by_measure_lookup: Optional[SemanticModelByMeasureLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_disabled_lookup: Optional[DisabledLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
Expand Down Expand Up @@ -960,6 +1008,13 @@ def metric_lookup(self) -> MetricLookup:
self._metric_lookup = MetricLookup(self)
return self._metric_lookup

@property
def semantic_model_by_measure_lookup(self) -> SemanticModelByMeasureLookup:
"""Gets (and creates if necessary) the lookup utility for getting SemanticModels by measures"""
if self._semantic_model_by_measure_lookup is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit

Suggested change
if self._semantic_model_by_measure_lookup is None:
if not self._semantic_model_by_measure_lookup:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The checked-in code follows the paradigm of all the other *_lookup properties. I'm also biased because if we're specifically checking for None, then we should specifically check against None. Python can have many weird false-y states. It'd be weird if any cropped up here, but we are specifically checking against None intentionally

self._semantic_model_by_measure_lookup = SemanticModelByMeasureLookup(self)
return self._semantic_model_by_measure_lookup

def rebuild_ref_lookup(self):
self._ref_lookup = RefableLookup(self)

Expand Down Expand Up @@ -1087,6 +1142,25 @@ def resolve_metric(
return Disabled(disabled[0])
return None

def resolve_semantic_model_for_measure(
self,
target_measure_name: str,
current_project: str,
node_package: str,
target_package: Optional[str] = None,
) -> Optional[SemanticModel]:
"""Tries to find the SemanticModel that a measure belongs to"""
candidates = _packages_to_search(current_project, node_package, target_package)

for pkg in candidates:
semantic_model = self.semantic_model_by_measure_lookup.find(
target_measure_name, pkg, self
)
if semantic_model is not None:
return semantic_model

return None

# Called by DocsRuntimeContext.doc
def resolve_doc(
self,
Expand Down Expand Up @@ -1328,6 +1402,7 @@ def __reduce_ex__(self, protocol):
self._source_lookup,
self._ref_lookup,
self._metric_lookup,
self._semantic_model_by_measure_lookup,
self._disabled_lookup,
self._analysis_lookup,
)
Expand Down
5 changes: 5 additions & 0 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,11 @@ def replace(self, **kwargs):
return self.from_dict(dct)


@dataclass
class SemanticModelConfig(BaseConfig):
enabled: bool = True


@dataclass
class MetricConfig(BaseConfig):
enabled: bool = True
Expand Down
2 changes: 2 additions & 0 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
ExposureConfig,
EmptySnapshotConfig,
SnapshotConfig,
SemanticModelConfig,
)


Expand Down Expand Up @@ -1482,6 +1483,7 @@ class SemanticModel(GraphNode):
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[RefArgs] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
config: SemanticModelConfig = field(default_factory=SemanticModelConfig)

@property
def entity_references(self) -> List[LinkableElementReference]:
Expand Down
15 changes: 14 additions & 1 deletion core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1447,7 +1447,7 @@ def _process_metric_node(
current_project: str,
metric: Metric,
) -> None:
"""Sets a metric's input_measures"""
"""Sets a metric's `input_measures` and `depends_on` properties"""

# This ensures that if this metrics input_measures have already been set
# we skip the work. This could happen either due to recursion or if multiple
Expand All @@ -1461,6 +1461,18 @@ def _process_metric_node(
metric.type_params.measure is not None
), f"{metric} should have a measure defined, but it does not."
metric.type_params.input_measures.append(metric.type_params.measure)
target_semantic_model = manifest.resolve_semantic_model_for_measure(
target_measure_name=metric.type_params.measure.name,
current_project=current_project,
node_package=metric.package_name,
)
if target_semantic_model is None:
raise dbt.exceptions.ParsingError(
f"A semantic model having a measure `{metric.type_params.measure.name}` does not exist but was referenced.",
node=metric,
)

metric.depends_on.add_node(target_semantic_model.unique_id)

elif metric.type is MetricType.DERIVED or metric.type is MetricType.RATIO:
input_metrics = metric.input_metrics
Expand Down Expand Up @@ -1495,6 +1507,7 @@ def _process_metric_node(
manifest=manifest, current_project=current_project, metric=target_metric
)
metric.type_params.input_measures.extend(target_metric.type_params.input_measures)
metric.depends_on.add_node(target_metric.unique_id)
else:
assert_values_exhausted(metric.type)

Expand Down
35 changes: 33 additions & 2 deletions tests/functional/access/test_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
group: analytics
- name: people_model
description: "some people"
access: private
access: public
group: analytics
"""

Expand All @@ -124,6 +124,31 @@
select 1 as id, 'Callum' as first_name, 'McCann' as last_name, 'emerald' as favorite_color, true as loves_dbt, 0 as tenure, current_timestamp as created_at
"""

people_semantic_model_yml = """
semantic_models:
- name: semantic_people
model: ref('people_model')
dimensions:
- name: favorite_color
type: categorical
- name: created_at
type: TIME
type_params:
time_granularity: day
measures:
- name: years_tenure
agg: SUM
expr: tenure
- name: people
agg: count
expr: id
entities:
- name: id
type: primary
defaults:
agg_time_dimension: created_at
"""

people_metric_yml = """
metrics:

Expand Down Expand Up @@ -203,6 +228,10 @@
group: package
"""

metricflow_time_spine_sql = """
SELECT to_date('02/20/2023', 'mm/dd/yyyy') as date_day
"""


class TestAccess:
@pytest.fixture(scope="class")
Expand Down Expand Up @@ -278,10 +307,12 @@ def test_access_attribute(self, project):
write_file(v5_schema_yml, project.project_root, "models", "schema.yml")
rm_file(project.project_root, "models", "simple_exposure.yml")
write_file(people_model_sql, "models", "people_model.sql")
write_file(people_semantic_model_yml, "models", "people_semantic_model.yml")
write_file(people_metric_yml, "models", "people_metric.yml")
write_file(metricflow_time_spine_sql, "models", "metricflow_time_spine.sql")
# Should succeed
manifest = run_dbt(["parse"])
assert len(manifest.nodes) == 4
assert len(manifest.nodes) == 5
manifest = get_manifest(project.project_root)
metric_id = "metric.test.number_of_people"
assert manifest.metrics[metric_id].group == "analytics"
Expand Down
37 changes: 34 additions & 3 deletions tests/functional/artifacts/test_previous_version_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@
select 9 as id
"""

metricflow_time_spine_sql = """
SELECT to_date('02/20/2023', 'mm/dd/yyyy') as date_day
"""

# Use old attribute names (v1.0-1.2) to test forward/backward compatibility with the rename in v1.3
models__schema_yml = """
version: 2
Expand All @@ -127,6 +131,32 @@
tests:
- not_null

semantic_models:
- name: semantic_people
model: ref('my_model')
dimensions:
- name: favorite_color
type: categorical
- name: created_at
type: TIME
type_params:
time_granularity: day
measures:
- name: years_tenure
agg: SUM
expr: tenure
- name: people
agg: count
expr: id
- name: customers
agg: count
expr: id
entities:
- name: id
type: primary
defaults:
agg_time_dimension: created_at

metrics:
- name: my_metric
label: Count records
Expand Down Expand Up @@ -208,6 +238,7 @@ def models(self):
"schema.yml": models__schema_yml,
"somedoc.md": docs__somedoc_md,
"disabled_model.sql": models__disabled_model_sql,
"metricflow_time_spine.sql": metricflow_time_spine_sql,
}

@pytest.fixture(scope="class")
Expand Down Expand Up @@ -250,10 +281,10 @@ def test_project(self, project):
# This is mainly used to test changes to the test project in isolation from
# the other noise.
results = run_dbt(["run"])
assert len(results) == 1
assert len(results) == 2
manifest = get_manifest(project.project_root)
# model, snapshot, seed, singular test, generic test, analysis
assert len(manifest.nodes) == 7
assert len(manifest.nodes) == 8
assert len(manifest.sources) == 1
assert len(manifest.exposures) == 1
assert len(manifest.metrics) == 1
Expand Down Expand Up @@ -297,7 +328,7 @@ def compare_previous_state(
]
if expect_pass:
results = run_dbt(cli_args, expect_pass=expect_pass)
assert len(results) == 0
assert len(results) == 1
else:
with pytest.raises(IncompatibleSchemaError):
run_dbt(cli_args, expect_pass=expect_pass)
Expand Down
24 changes: 24 additions & 0 deletions tests/functional/exposures/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
"""


metricflow_time_spine_sql = """
SELECT to_date('02/20/2023', 'mm/dd/yyyy') as date_day
"""


source_schema_yml = """version: 2

sources:
Expand All @@ -15,6 +20,25 @@
- name: test_table
"""


semantic_models_schema_yml = """version: 2

semantic_models:
- name: semantic_model
model: ref('model')
measures:
- name: distinct_metrics
agg: count_distinct
expr: id
entities:
- name: model
type: primary
expr: id
defaults:
agg_time_dimension: created_at
"""


metrics_schema_yml = """version: 2

metrics:
Expand Down
4 changes: 4 additions & 0 deletions tests/functional/exposures/test_exposure_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
enabled_yaml_level_exposure_yml,
invalid_config_exposure_yml,
source_schema_yml,
metricflow_time_spine_sql,
semantic_models_schema_yml,
metrics_schema_yml,
)

Expand All @@ -30,9 +32,11 @@ class TestExposureEnabledConfigProjectLevel(ExposureConfigTests):
def models(self):
return {
"model.sql": models_sql,
"metricflow_time_spine.sql": metricflow_time_spine_sql,
"second_model.sql": second_model_sql,
"exposure.yml": simple_exposure_yml,
"schema.yml": source_schema_yml,
"semantic_models.yml": semantic_models_schema_yml,
"metrics.yml": metrics_schema_yml,
}

Expand Down
Loading
Loading