Skip to content

Commit

Permalink
feat(ingest): extract dbt versions into custom properties (#3021)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinhu authored Aug 3, 2021
1 parent ea7952c commit 3701931
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 37 deletions.
40 changes: 35 additions & 5 deletions metadata-ingestion/src/datahub/ingestion/source/dbt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import logging
from dataclasses import dataclass, field
from typing import Any, Dict, Iterable, List, Optional
from typing import Any, Dict, Iterable, List, Optional, Tuple

import dateutil.parser

Expand Down Expand Up @@ -234,7 +234,7 @@ def loadManifestAndCatalog(
environment: str,
node_type_pattern: AllowDenyPattern,
report: SourceReport,
) -> List[DBTNode]:
) -> Tuple[List[DBTNode], Optional[str], Optional[str], Optional[str], Optional[str]]:
with open(manifest_path, "r") as manifest:
dbt_manifest_json = json.load(manifest)

Expand All @@ -248,6 +248,12 @@ def loadManifestAndCatalog(
else:
sources_results = {}

manifest_schema = dbt_manifest_json.get("metadata", {}).get("dbt_schema_version")
manifest_version = dbt_manifest_json.get("metadata", {}).get("dbt_version")

catalog_schema = dbt_catalog_json.get("metadata", {}).get("dbt_schema_version")
catalog_version = dbt_catalog_json.get("metadata", {}).get("dbt_version")

manifest_nodes = dbt_manifest_json["nodes"]
manifest_sources = dbt_manifest_json["sources"]

Expand All @@ -269,7 +275,7 @@ def loadManifestAndCatalog(
report,
)

return nodes
return nodes, manifest_schema, manifest_version, catalog_schema, catalog_version


def get_urn_from_dbtNode(
Expand Down Expand Up @@ -447,7 +453,13 @@ def __init__(self, config: DBTConfig, ctx: PipelineContext, platform: str):
self.report = SourceReport()

def get_workunits(self) -> Iterable[MetadataWorkUnit]:
nodes = loadManifestAndCatalog(
(
nodes,
manifest_schema,
manifest_version,
catalog_schema,
catalog_version,
) = loadManifestAndCatalog(
self.config.manifest_path,
self.config.catalog_path,
self.config.sources_path,
Expand All @@ -458,6 +470,19 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
self.report,
)

additional_custom_props = {
"manifest_schema": manifest_schema,
"manifest_version": manifest_version,
"catalog_schema": catalog_schema,
"catalog_version": catalog_version,
}

additional_custom_props_filtered = {
key: value
for key, value in additional_custom_props.items()
if value is not None
}

for node in nodes:

dataset_snapshot = DatasetSnapshot(
Expand All @@ -474,9 +499,14 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
elif node.description:
description = node.description

custom_props = {
**get_custom_properties(node),
**additional_custom_props_filtered,
}

dbt_properties = DatasetPropertiesClass(
description=description,
customProperties=get_custom_properties(node),
customProperties=custom_props,
tags=[],
)
dataset_snapshot.aspects.append(dbt_properties)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
"customProperties": {
"node_type": "model",
"materialization": "ephemeral",
"dbt_file_path": "models/transform/customer_details.sql"
"dbt_file_path": "models/transform/customer_details.sql",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -102,7 +106,11 @@
"node_type": "model",
"materialization": "table",
"dbt_file_path": "models/billing/monthly_billing_with_cust.sql",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -246,7 +254,11 @@
"node_type": "model",
"materialization": "view",
"dbt_file_path": "models/base/payments_base.sql",
"catalog_type": "VIEW"
"catalog_type": "VIEW",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -465,7 +477,11 @@
"node_type": "model",
"materialization": "table",
"dbt_file_path": "models/transform/payments_by_customer_by_month.sql",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -587,7 +603,11 @@
"some_other_property": "test 1",
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": "postgres comment: Actors table \u2013 from postgres\n\ndbt model description: description for actor table from dbt",
Expand Down Expand Up @@ -711,7 +731,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": "a user's address",
Expand Down Expand Up @@ -895,7 +919,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": "a user's category",
Expand Down Expand Up @@ -1004,7 +1032,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -1131,7 +1163,11 @@
"some_other_property": "test 2",
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -1240,7 +1276,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": "description for customer table from dbt",
Expand Down Expand Up @@ -1454,7 +1494,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -1612,7 +1656,11 @@
"some_other_property": "test 3",
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -1766,7 +1814,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -1920,7 +1972,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down Expand Up @@ -2074,7 +2130,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": "a payment",
Expand Down Expand Up @@ -2228,7 +2288,11 @@
"customProperties": {
"node_type": "source",
"dbt_file_path": "models/base.yml",
"catalog_type": "BASE TABLE"
"catalog_type": "BASE TABLE",
"manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json",
"manifest_version": "0.19.1",
"catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json",
"catalog_version": "0.19.1"
},
"externalUrl": null,
"description": null,
Expand Down
Loading

0 comments on commit 3701931

Please sign in to comment.