Skip to content

Commit

Permalink
Allow duplicate refable node names across packages (#7374)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichelleArk authored May 8, 2023
1 parent 5a7b73b commit 0de046d
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 23 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20230429-155057.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Allow duplicate manifest node (models, seeds, analyses, snapshots) names across
packages
time: 2023-04-29T15:50:57.757832-04:00
custom:
Author: MichelleArk
Issue: "7446"
1 change: 1 addition & 0 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ def resolve(
target_version: Optional[NodeVersion] = None,
) -> RelationProxy:
target_model = self.manifest.resolve_ref(
self.model,
target_name,
target_package,
target_version,
Expand Down
45 changes: 37 additions & 8 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
DuplicateResourceNameError,
DuplicateMacroInPackageError,
DuplicateMaterializationNameError,
AmbiguousResourceNameRefError,
)
from dbt.helper_types import PathSet
from dbt.events.functions import fire_event
Expand Down Expand Up @@ -152,27 +153,36 @@ class RefableLookup(dbtClassMixin):
_lookup_types: ClassVar[set] = set(NodeType.refable())
_versioned_types: ClassVar[set] = set(NodeType.versioned())

# refables are actually unique, so the Dict[PackageName, UniqueID] will
# only ever have exactly one value, but doing 3 dict lookups instead of 1
# is not a big deal at all and retains consistency
def __init__(self, manifest: "Manifest"):
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)
self.populate_public_nodes(manifest)

def get_unique_id(self, key, package: Optional[PackageName], version: Optional[NodeVersion]):
def get_unique_id(
self,
key: str,
package: Optional[PackageName],
version: Optional[NodeVersion],
node: Optional[GraphMemberNode] = None,
):
if version:
key = f"{key}.v{version}"
return find_unique_id_for_package(self.storage, key, package)

unique_ids = self._find_unique_ids_for_package(key, package)
if len(unique_ids) > 1:
raise AmbiguousResourceNameRefError(key, unique_ids, node)
else:
return unique_ids[0] if unique_ids else None

def find(
self,
key,
key: str,
package: Optional[PackageName],
version: Optional[NodeVersion],
manifest: "Manifest",
source_node: Optional[GraphMemberNode] = None,
):
unique_id = self.get_unique_id(key, package, version)
unique_id = self.get_unique_id(key, package, version, source_node)
if unique_id is not None:
node = self.perform_lookup(unique_id, manifest)
# If this is an unpinned ref (no 'version' arg was passed),
Expand Down Expand Up @@ -235,6 +245,22 @@ def perform_lookup(self, unique_id: UniqueID, manifest) -> ManifestOrPublicNode:
)
return node

def _find_unique_ids_for_package(self, key, package: Optional[PackageName]) -> List[str]:
if key not in self.storage:
return []

pkg_dct: Mapping[PackageName, UniqueID] = self.storage[key]

if package is None:
if not pkg_dct:
return []
else:
return list(pkg_dct.values())
elif package in pkg_dct:
return [pkg_dct[package]]
else:
return []


class MetricLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
Expand Down Expand Up @@ -936,6 +962,7 @@ def analysis_lookup(self) -> AnalysisLookup:
# and dbt.parser.manifest._process_refs_for_node
def resolve_ref(
self,
source_node: GraphMemberNode,
target_model_name: str,
target_model_package: Optional[str],
target_model_version: Optional[NodeVersion],
Expand All @@ -948,7 +975,9 @@ def resolve_ref(

candidates = _packages_to_search(current_project, node_package, target_model_package)
for pkg in candidates:
node = self.ref_lookup.find(target_model_name, pkg, target_model_version, self)
node = self.ref_lookup.find(
target_model_name, pkg, target_model_version, self, source_node
)

if node is not None and (
(hasattr(node, "config") and node.config.enabled) or node.is_public_node
Expand Down
17 changes: 17 additions & 0 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1975,6 +1975,23 @@ def get_message(self) -> str:
return msg


class AmbiguousResourceNameRefError(CompilationError):
def __init__(self, duped_name, unique_ids, node=None):
self.duped_name = duped_name
self.unique_ids = unique_ids
self.packages = [unique_id.split(".")[1] for unique_id in unique_ids]
super().__init__(msg=self.get_message(), node=node)

def get_message(self) -> str:
formatted_unique_ids = "'{0}'".format("', '".join(self.unique_ids))
formatted_packages = "'{0}'".format("' or '".join(self.packages))
msg = (
f"When referencing '{self.duped_name}', dbt found nodes in multiple packages: {formatted_unique_ids}"
f"\nTo fix this, use two-argument 'ref', with the package name first: {formatted_packages}"
)
return msg


class AmbiguousCatalogMatchError(CompilationError):
def __init__(self, unique_id: str, match_1, match_2):
self.unique_id = unique_id
Expand Down
10 changes: 3 additions & 7 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,30 +1261,23 @@ def _check_resource_uniqueness(
manifest: Manifest,
config: RuntimeConfig,
) -> None:
names_resources: Dict[str, ManifestNode] = {}
alias_resources: Dict[str, ManifestNode] = {}

for resource, node in manifest.nodes.items():
if not node.is_relational:
continue

name = node.name
# the full node name is really defined by the adapter's relation
relation_cls = get_relation_class_by_name(config.credentials.type)
relation = relation_cls.create_from(config=config, node=node)
full_node_name = str(relation)

existing_node = names_resources.get(name)
if existing_node is not None and not existing_node.is_versioned:
raise dbt.exceptions.DuplicateResourceNameError(existing_node, node)

existing_alias = alias_resources.get(full_node_name)
if existing_alias is not None:
raise AmbiguousAliasError(
node_1=existing_alias, node_2=node, duped_name=full_node_name
)

names_resources[name] = node
alias_resources[full_node_name] = node


Expand Down Expand Up @@ -1362,6 +1355,7 @@ def _process_refs_for_exposure(manifest: Manifest, current_project: str, exposur
)

target_model = manifest.resolve_ref(
exposure,
target_model_name,
target_model_package,
target_model_version,
Expand Down Expand Up @@ -1414,6 +1408,7 @@ def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: M
)

target_model = manifest.resolve_ref(
metric,
target_model_name,
target_model_package,
target_model_version,
Expand Down Expand Up @@ -1518,6 +1513,7 @@ def _process_refs_for_node(manifest: Manifest, current_project: str, node: Manif
)

target_model = manifest.resolve_ref(
node,
target_model_name,
target_model_package,
target_model_version,
Expand Down
5 changes: 4 additions & 1 deletion core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,10 @@ def parse_patch(self, block: TargetBlock[NodeTarget], refs: ParserRef) -> None:
assert isinstance(self.yaml.file, SchemaSourceFile)
source_file: SchemaSourceFile = self.yaml.file
if patch.yaml_key in ["models", "seeds", "snapshots"]:
unique_id = self.manifest.ref_lookup.get_unique_id(patch.name, None, None)
unique_id = self.manifest.ref_lookup.get_unique_id(
patch.name, self.project.project_name, None
) or self.manifest.ref_lookup.get_unique_id(patch.name, None, None)

if unique_id:
resource_type = NodeType(unique_id.split(".")[0])
if resource_type.pluralize() != patch.yaml_key:
Expand Down
89 changes: 89 additions & 0 deletions test/unit/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
)

from dbt.events.functions import reset_metadata_vars
from dbt.exceptions import AmbiguousResourceNameRefError
from dbt.flags import set_from_args

from dbt.node_types import NodeType
Expand Down Expand Up @@ -1449,11 +1450,75 @@ def _refable_parameter_sets():
version=None,
expected=None,
),
# duplicate node name across package
FindNodeSpec(
nodes=[MockNode("project_a", "my_model"), MockNode("project_b", "my_model")],
sources=[],
package="project_a",
version=None,
expected=("project_a", "my_model"),
),
# duplicate node name across package: root node preferred to package node
FindNodeSpec(
nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")],
sources=[],
package=None,
version=None,
expected=("root", "my_model"),
),
FindNodeSpec(
nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")],
sources=[],
package="root",
version=None,
expected=("root", "my_model"),
),
FindNodeSpec(
nodes=[MockNode("root", "my_model"), MockNode("project_a", "my_model")],
sources=[],
package="project_a",
version=None,
expected=("project_a", "my_model"),
),
# duplicate node name across package: resolved by version
FindNodeSpec(
nodes=[
MockNode("project_a", "my_model", version="1"),
MockNode("project_b", "my_model", version="2"),
],
sources=[],
package=None,
version="1",
expected=("project_a", "my_model", "1"),
),
]
)
return sets


def _ambiguous_ref_parameter_sets():
sets = [
FindNodeSpec(
nodes=[MockNode("project_a", "my_model"), MockNode("project_b", "my_model")],
sources=[],
package=None,
version=None,
expected=None,
),
FindNodeSpec(
nodes=[
MockNode("project_a", "my_model", version="2", is_latest_version=True),
MockNode("project_b", "my_model", version="1", is_latest_version=True),
],
sources=[],
package=None,
version=None,
expected=None,
),
]
return sets


def id_nodes(arg):
if isinstance(arg, list):
node_names = "__".join(f"{n.package_name}_{n.search_name}" for n in arg)
Expand All @@ -1470,6 +1535,7 @@ def id_nodes(arg):
def test_resolve_ref(nodes, sources, package, version, expected):
manifest = make_manifest(nodes=nodes, sources=sources)
result = manifest.resolve_ref(
source_node=None,
target_model_name="my_model",
target_model_package=package,
target_model_version=version,
Expand All @@ -1491,6 +1557,29 @@ def test_resolve_ref(nodes, sources, package, version, expected):
assert result.package_name == expected_package


@pytest.mark.parametrize(
"nodes,sources,package,version,expected",
_ambiguous_ref_parameter_sets(),
ids=id_nodes,
)
def test_resolve_ref_ambiguous_resource_name_across_packages(
nodes, sources, package, version, expected
):
manifest = make_manifest(
nodes=nodes,
sources=sources,
)
with pytest.raises(AmbiguousResourceNameRefError):
manifest.resolve_ref(
source_node=None,
target_model_name="my_model",
target_model_package=None,
target_model_version=version,
current_project="root",
node_package="root",
)


def _source_parameter_sets():
sets = [
# empties
Expand Down
10 changes: 5 additions & 5 deletions tests/functional/duplicates/test_duplicate_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from dbt.exceptions import CompilationError, DuplicateResourceNameError
from dbt.exceptions import CompilationError, AmbiguousAliasError
from dbt.tests.fixtures.project import write_project_files
from dbt.tests.util import run_dbt, get_manifest

Expand Down Expand Up @@ -88,7 +88,7 @@ def test_duplicate_model_disabled_partial_parsing(self, project):
assert len(results) == 1


class TestDuplicateModelEnabledAcrossPackages:
class TestDuplicateModelAliasEnabledAcrossPackages:
@pytest.fixture(scope="class")
def models(self):
return {"table_model.sql": enabled_model_sql}
Expand All @@ -105,10 +105,10 @@ def setUp(self, project_root):
def packages(self):
return {"packages": [{"local": "local_dependency"}]}

def test_duplicate_model_enabled_across_packages(self, project):
def test_duplicate_model_alias_enabled_across_packages(self, project):
run_dbt(["deps"])
message = "dbt found two models with the name"
with pytest.raises(DuplicateResourceNameError) as exc:
message = "dbt found two resources with the database representation"
with pytest.raises(AmbiguousAliasError) as exc:
run_dbt(["run"])
assert message in str(exc.value)

Expand Down
4 changes: 2 additions & 2 deletions tests/functional/multi_project/test_publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ def test_pub_artifacts(self, project):
publication = PublicationArtifact.from_dict(publication_dict)
assert publication.dependencies == ["marketing"]

# target_model_name, target_model_package, target_model_version, current_project, node_package
resolved_node = manifest.resolve_ref("fct_one", "marketing", None, "test", "test")
# source_node, target_model_name, target_model_package, target_model_version, current_project, node_package
resolved_node = manifest.resolve_ref(None, "fct_one", "marketing", None, "test", "test")
assert resolved_node
assert isinstance(resolved_node, PublicModel)
assert resolved_node.unique_id == "model.marketing.fct_one"
Expand Down

0 comments on commit 0de046d

Please sign in to comment.