Skip to content

Commit

Permalink
fix(ingest/dbt): always encode tag urns (datahub-project#10799)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored and aviv-julienjehannet committed Jul 25, 2024
1 parent 207c729 commit f168925
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 7 deletions.
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/emitter/mce_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def get_or_add_aspect(mce: MetadataChangeEventClass, default: Aspect) -> Aspect:

def make_global_tag_aspect_with_tag_list(tags: List[str]) -> GlobalTagsClass:
return GlobalTagsClass(
tags=[TagAssociationClass(f"urn:li:tag:{tag}") for tag in tags]
tags=[TagAssociationClass(make_tag_urn(tag)) for tag in tags]
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,7 @@ def get_patched_mce(self, mce):
transformed_tag_list = self.get_transformed_tags_by_prefix(
tag_aspect.tags,
mce.proposedSnapshot.urn,
mce_builder.make_tag_urn(self.config.tag_prefix),
tag_prefix=self.config.tag_prefix,
)
tag_aspect.tags = transformed_tag_list

Expand Down Expand Up @@ -1874,16 +1874,19 @@ def get_transformed_tags_by_prefix(
self,
new_tags: List[TagAssociationClass],
entity_urn: str,
tags_prefix_filter: str,
tag_prefix: str,
) -> List[TagAssociationClass]:
tag_set = {new_tag.tag for new_tag in new_tags}

if self.ctx.graph:
existing_tags_class = self.ctx.graph.get_tags(entity_urn)
if existing_tags_class and existing_tags_class.tags:
for exiting_tag in existing_tags_class.tags:
if not exiting_tag.tag.startswith(tags_prefix_filter):
tag_set.add(exiting_tag.tag)
for existing_tag in existing_tags_class.tags:
if tag_prefix and existing_tag.tag.startswith(
mce_builder.make_tag_urn(tag_prefix)
):
continue
tag_set.add(existing_tag.tag)
return [TagAssociationClass(tag) for tag in sorted(tag_set)]

# This method attempts to read-modify and return the glossary terms of a dataset.
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/tests/unit/test_dbt_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_dbt_source_patching_tags():
["new_non_dbt", "dbt:new_dbt"]
)
transformed_tags = source.get_transformed_tags_by_prefix(
new_tag_aspect.tags, "urn:li:dataset:dummy", "urn:li:tag:dbt:"
new_tag_aspect.tags, "urn:li:dataset:dummy", "dbt:"
)
expected_tags = {
"urn:li:tag:new_non_dbt",
Expand Down

0 comments on commit f168925

Please sign in to comment.