From 15e544cffb0d6a29fa445b02e237672ff655c57b Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware Date: Mon, 1 Jul 2024 20:37:18 +0530 Subject: [PATCH] fix: PR Comments --- .../docs/transformer/dataset_transformer.md | 21 ++++++++++--------- .../src/datahub/ingestion/graph/client.py | 6 ++---- .../tests/unit/test_transform_dataset.py | 14 +++++++------ 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index 8505815a93963..773a7e8554832 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -679,11 +679,11 @@ We can add glossary terms to datasets based on a regex filter.
-The `tags_to_term` transformer is designed to map specific tags to glossary terms within DataHub. It takes a configuration of tags should be translated into corresponding glossaryTerm. This transformer can apply these mappings to any tags found either at column level of dataset or dataset top level. +The `tags_to_term` transformer is designed to map specific tags to glossary terms within DataHub. It takes a configuration of tags that should be translated into corresponding glossary terms. This transformer can apply these mappings to any tags found either at the column level of a dataset or at the dataset top level. When specifying tags in the configuration, use the tag's simple name rather than the full tag URN. -For example, instead of using the tag URN `urn:li:tag:snowflakedb.snowflakeschema.tag_name:tag_value`, you should specify just the tag name `tag_name` in the mapping configuration +For example, instead of using the tag URN `urn:li:tag:snowflakedb.snowflakeschema.tag_name:tag_value`, you should specify just the tag name `tag_name` in the mapping configuration. ```yaml transformers: @@ -694,12 +694,12 @@ transformers: - "tag_name" ``` -`tags_to_term` can be configured in below different way +The `tags_to_term` transformer can be configured in the following ways: -- Add domains based on tags, however overwrite the domains available for the dataset on DataHub GMS +- Add terms based on tags, however overwrite the terms available for the dataset on DataHub GMS ```yaml transformers: - - type: "domain_mapping_based_on_tags" + - type: "tags_to_term" config: semantics: OVERWRITE # OVERWRITE is default behaviour tags: @@ -707,15 +707,16 @@ transformers: - "example2" - "example3" ``` -- Add domains based on tags, however keep the domains available for the dataset on DataHub GMS +- Add terms based on tags, however keep the terms available for the dataset on DataHub GMS ```yaml transformers: - - type: "domain_mapping_based_on_tags" + - type: "tags_to_term" config: semantics: PATCH - domain_mapping: - 'example1': "urn:li:domain:engineering" - 'example2': "urn:li:domain:hr" + tags: + - "example1" + - "example2" + - "example3" ``` ## Pattern Add Dataset Schema Field glossaryTerms diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index ac5cf29dde1e8..7ba412b3e772c 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -1279,7 +1279,7 @@ def create_tag(self, tag_name: str) -> str: return res["createTag"] def remove_tag(self, tag_urn: str, resource_urn: str) -> bool: - graph_query = """ + graph_query = f""" mutation removeTag {{ removeTag( input: {{ @@ -1287,9 +1287,7 @@ def remove_tag(self, tag_urn: str, resource_urn: str) -> bool: resourceUrn: "{resource_urn}" }}) }} - """.format( - tag_urn=tag_urn, resource_urn=resource_urn - ) + """ res = self.execute_graphql(query=graph_query) return res["removeTag"] diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 4dbc8061d95eb..4170fb5bf8b67 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -1898,9 +1898,11 @@ def run_dataset_transformer_pipeline( transformer_type: Type[Union[DatasetTransformer, TagTransformer]], aspect: Optional[builder.Aspect], config: dict, - pipeline_context: PipelineContext = PipelineContext(run_id="transformer_pipe_line"), + pipeline_context: Optional[PipelineContext] = None, use_mce: bool = False, ) -> List[RecordEnvelope]: + if pipeline_context is None: + pipeline_context = PipelineContext(run_id="transformer_pipe_line") transformer: DatasetTransformer = cast( DatasetTransformer, transformer_type.create(config, pipeline_context) ) @@ -3665,8 +3667,8 @@ def test_tags_to_terms_transformation(mock_datahub_graph): def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: return models.GlobalTagsClass( tags=[ - TagAssociationClass(tag=builder.make_tag_urn("exmaple1")), - TagAssociationClass(tag=builder.make_tag_urn("exmaple2")), + TagAssociationClass(tag=builder.make_tag_urn("example1")), + TagAssociationClass(tag=builder.make_tag_urn("example2")), ] ) @@ -3729,7 +3731,7 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: pipeline_context.graph.get_schema_metadata = fake_schema_metadata # type: ignore # Configuring the transformer - config = {"tags": ["exmaple1", "exmaple2"]} + config = {"tags": ["example1", "example2"]} # Running the transformer within a test pipeline output = run_dataset_transformer_pipeline( @@ -3753,8 +3755,8 @@ def fake_schema_metadata(entity_urn: str) -> models.SchemaMetadataClass: assert isinstance(terms_aspect, models.GlossaryTermsClass) assert len(terms_aspect.terms) == len(expected_terms) assert set(term.urn for term in terms_aspect.terms) == { - "urn:li:glossaryTerm:exmaple1", - "urn:li:glossaryTerm:exmaple2", + "urn:li:glossaryTerm:example1", + "urn:li:glossaryTerm:example2", }