From 3701931363efeaa952e23de4da87c90a7cff720d Mon Sep 17 00:00:00 2001 From: Kevin Hu <6051736+kevinhu@users.noreply.github.com> Date: Tue, 3 Aug 2021 15:13:49 -0400 Subject: [PATCH] feat(ingest): extract dbt versions into custom properties (#3021) --- .../src/datahub/ingestion/source/dbt.py | 40 +++++++- .../dbt/dbt_with_schemas_mces_golden.json | 96 +++++++++++++++---- .../dbt/dbt_without_schemas_mces_golden.json | 96 +++++++++++++++---- 3 files changed, 195 insertions(+), 37 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt.py b/metadata-ingestion/src/datahub/ingestion/source/dbt.py index 3971bc0b9d63a3..5a55d6bd518370 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt.py @@ -1,7 +1,7 @@ import json import logging from dataclasses import dataclass, field -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Tuple import dateutil.parser @@ -234,7 +234,7 @@ def loadManifestAndCatalog( environment: str, node_type_pattern: AllowDenyPattern, report: SourceReport, -) -> List[DBTNode]: +) -> Tuple[List[DBTNode], Optional[str], Optional[str], Optional[str], Optional[str]]: with open(manifest_path, "r") as manifest: dbt_manifest_json = json.load(manifest) @@ -248,6 +248,12 @@ def loadManifestAndCatalog( else: sources_results = {} + manifest_schema = dbt_manifest_json.get("metadata", {}).get("dbt_schema_version") + manifest_version = dbt_manifest_json.get("metadata", {}).get("dbt_version") + + catalog_schema = dbt_catalog_json.get("metadata", {}).get("dbt_schema_version") + catalog_version = dbt_catalog_json.get("metadata", {}).get("dbt_version") + manifest_nodes = dbt_manifest_json["nodes"] manifest_sources = dbt_manifest_json["sources"] @@ -269,7 +275,7 @@ def loadManifestAndCatalog( report, ) - return nodes + return nodes, manifest_schema, manifest_version, catalog_schema, catalog_version def get_urn_from_dbtNode( @@ -447,7 +453,13 @@ def __init__(self, config: DBTConfig, ctx: PipelineContext, platform: str): self.report = SourceReport() def get_workunits(self) -> Iterable[MetadataWorkUnit]: - nodes = loadManifestAndCatalog( + ( + nodes, + manifest_schema, + manifest_version, + catalog_schema, + catalog_version, + ) = loadManifestAndCatalog( self.config.manifest_path, self.config.catalog_path, self.config.sources_path, @@ -458,6 +470,19 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: self.report, ) + additional_custom_props = { + "manifest_schema": manifest_schema, + "manifest_version": manifest_version, + "catalog_schema": catalog_schema, + "catalog_version": catalog_version, + } + + additional_custom_props_filtered = { + key: value + for key, value in additional_custom_props.items() + if value is not None + } + for node in nodes: dataset_snapshot = DatasetSnapshot( @@ -474,9 +499,14 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: elif node.description: description = node.description + custom_props = { + **get_custom_properties(node), + **additional_custom_props_filtered, + } + dbt_properties = DatasetPropertiesClass( description=description, - customProperties=get_custom_properties(node), + customProperties=custom_props, tags=[], ) dataset_snapshot.aspects.append(dbt_properties) diff --git a/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json index 92c28ee18e71fd..649d5c6c14a13a 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_with_schemas_mces_golden.json @@ -10,7 +10,11 @@ "customProperties": { "node_type": "model", "materialization": "ephemeral", - "dbt_file_path": "models/transform/customer_details.sql" + "dbt_file_path": "models/transform/customer_details.sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -102,7 +106,11 @@ "node_type": "model", "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -246,7 +254,11 @@ "node_type": "model", "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", - "catalog_type": "VIEW" + "catalog_type": "VIEW", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -465,7 +477,11 @@ "node_type": "model", "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -587,7 +603,11 @@ "some_other_property": "test 1", "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "postgres comment: Actors table \u2013 from postgres\n\ndbt model description: description for actor table from dbt", @@ -711,7 +731,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "a user's address", @@ -895,7 +919,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "a user's category", @@ -1004,7 +1032,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -1131,7 +1163,11 @@ "some_other_property": "test 2", "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -1240,7 +1276,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "description for customer table from dbt", @@ -1454,7 +1494,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -1612,7 +1656,11 @@ "some_other_property": "test 3", "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -1766,7 +1814,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -1920,7 +1972,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -2074,7 +2130,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "a payment", @@ -2228,7 +2288,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json index 88e9a4008b1ef8..10416225d16712 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_without_schemas_mces_golden.json @@ -10,7 +10,11 @@ "customProperties": { "node_type": "model", "materialization": "ephemeral", - "dbt_file_path": "models/transform/customer_details.sql" + "dbt_file_path": "models/transform/customer_details.sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -73,7 +77,11 @@ "node_type": "model", "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -127,7 +135,11 @@ "node_type": "model", "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", - "catalog_type": "VIEW" + "catalog_type": "VIEW", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -226,7 +238,11 @@ "node_type": "model", "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -273,7 +289,11 @@ "some_other_property": "test 1", "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "postgres comment: Actors table \u2013 from postgres\n\ndbt model description: description for actor table from dbt", @@ -307,7 +327,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "a user's address", @@ -341,7 +365,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "a user's category", @@ -375,7 +403,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -412,7 +444,11 @@ "some_other_property": "test 2", "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -446,7 +482,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "description for customer table from dbt", @@ -480,7 +520,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -518,7 +562,11 @@ "some_other_property": "test 3", "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -552,7 +600,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -586,7 +638,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null, @@ -620,7 +676,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": "a payment", @@ -654,7 +714,11 @@ "customProperties": { "node_type": "source", "dbt_file_path": "models/base.yml", - "catalog_type": "BASE TABLE" + "catalog_type": "BASE TABLE", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", + "manifest_version": "0.19.1", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "0.19.1" }, "externalUrl": null, "description": null,