From 6a7d1cc2bdc70d7e6e8f3b4f89d5d34bb876f223 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 26 Dec 2024 21:34:31 -0500 Subject: [PATCH] feat(ingest/dbt): support "Explore" page in dbt cloud (#12223) --- docs/how/updating-datahub.md | 1 + .../src/datahub/ingestion/source/dbt/dbt_cloud.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index a742ebe0cd8968..d6620fde0bf794 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -42,6 +42,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - #12077: `Kafka` source no longer ingests schemas from schema registry as separate entities by default, set `ingest_schemas_as_entities` to `true` to ingest them - OpenAPI Update: PIT Keep Alive parameter added to scroll. NOTE: This parameter requires the `pointInTimeCreationEnabled` feature flag to be enabled and the `elasticSearch.implementation` configuration to be `elasticsearch`. This feature is not supported for OpenSearch at this time and the parameter will not be respected without both of these set. - OpenAPI Update 2: Previously there was an incorrectly marked parameter named `sort` on the generic list entities endpoint for v3. This parameter is deprecated and only supports a single string value while the documentation indicates it supports a list of strings. This documentation error has been fixed and the correct field, `sortCriteria`, is now documented which supports a list of strings. +- #12223: For dbt Cloud ingestion, the "View in dbt" link will point at the "Explore" page in the dbt Cloud UI. You can revert to the old behavior of linking to the dbt Cloud IDE by setting `external_url_mode: ide". ### Breaking Changes diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index 66c5ef7179af41..5042f6d69b261a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -1,7 +1,7 @@ import logging from datetime import datetime from json import JSONDecodeError -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Literal, Optional, Tuple from urllib.parse import urlparse import dateutil.parser @@ -62,6 +62,11 @@ class DBTCloudConfig(DBTCommonConfig): description="The ID of the run to ingest metadata from. If not specified, we'll default to the latest run.", ) + external_url_mode: Literal["explore", "ide"] = Field( + default="explore", + description='Where should the "View in dbt" link point to - either the "Explore" UI or the dbt Cloud IDE', + ) + @root_validator(pre=True) def set_metadata_endpoint(cls, values: dict) -> dict: if values.get("access_url") and not values.get("metadata_endpoint"): @@ -527,5 +532,7 @@ def _parse_into_dbt_column( ) def get_external_url(self, node: DBTNode) -> Optional[str]: - # TODO: Once dbt Cloud supports deep linking to specific files, we can use that. - return f"{self.config.access_url}/develop/{self.config.account_id}/projects/{self.config.project_id}" + if self.config.external_url_mode == "explore": + return f"{self.config.access_url}/explore/{self.config.account_id}/projects/{self.config.project_id}/environments/production/details/{node.dbt_name}" + else: + return f"{self.config.access_url}/develop/{self.config.account_id}/projects/{self.config.project_id}"