Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Add option for disabling ownership extraction #11970

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
"displayName": "Dremio",
"description": "Import Spaces, Sources, Tables and statistics from Dremio.",
"docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n stateful_ingestion:\n enabled: true"
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n ingest_owner: true\n\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:cassandra",
Expand Down
2 changes: 2 additions & 0 deletions metadata-ingestion/docs/sources/dremio/dremio_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ source:

include_query_lineage: True

ingest_owner: true

#Optional
source_mappings:
- platform: s3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def __init__(
platform: str,
ui_url: str,
env: str,
ingest_owner: bool,
domain: Optional[str] = None,
platform_instance: Optional[str] = None,
):
Expand All @@ -150,6 +151,7 @@ def __init__(
self.env = env
self.domain = domain
self.ui_url = ui_url
self.ingest_owner = ingest_owner

def get_container_key(
self, name: Optional[str], path: Optional[List[str]]
Expand Down Expand Up @@ -426,21 +428,23 @@ def _create_external_url(self, dataset: DremioDataset) -> str:
return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"'

def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]:
if not dataset.owner:
return None
owner = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
return OwnershipClass(
owners=[
OwnerClass(
owner=owner,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
if self.ingest_owner and dataset.owner:
owner_urn = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
ownership: OwnershipClass = OwnershipClass(
owners=[
OwnerClass(
owner=owner_urn,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
return ownership

return None

def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass:
return GlossaryTermsClass(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,8 @@ def is_profiling_enabled(self) -> bool:
default=False,
description="Whether to include query-based lineage information.",
)

ingest_owner: bool = Field(
default=True,
description="Ingest Owner from source. This will override Owner info entered from UI",
)
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class DremioSource(StatefulIngestionSourceBase):
- Ownership and Glossary Terms:
- Metadata related to ownership of datasets, extracted from Dremio’s ownership model.
- Glossary terms and business metadata associated with datasets, providing additional context to the data.
- Note: Ownership information will only be available for the Cloud and Enterprise editions, it will not be available for the Community edition.

- Optional SQL Profiling (if enabled):
- Table, row, and column statistics can be profiled and ingested via optional SQL queries.
Expand All @@ -123,6 +124,7 @@ def __init__(self, config: DremioSourceConfig, ctx: PipelineContext):
self.dremio_aspects = DremioAspects(
platform=self.get_platform(),
domain=self.config.domain,
ingest_owner=self.config.ingest_owner,
platform_instance=self.config.platform_instance,
env=self.config.env,
ui_url=dremio_api.ui_url,
Expand Down
Loading