Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Add option for disabling ownership extraction #11970

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
"displayName": "Dremio",
"description": "Import Spaces, Sources, Tables and statistics from Dremio.",
"docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n stateful_ingestion:\n enabled: true"
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n ingest_owner: true\n\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:cassandra",
Expand Down
2 changes: 2 additions & 0 deletions metadata-ingestion/docs/sources/dremio/dremio_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ source:

include_query_lineage: True

ingest_owner: true

#Optional
source_mappings:
- platform: s3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def __init__(
platform: str,
ui_url: str,
env: str,
ingest_owner: bool,
domain: Optional[str] = None,
platform_instance: Optional[str] = None,
):
Expand All @@ -150,6 +151,7 @@ def __init__(
self.env = env
self.domain = domain
self.ui_url = ui_url
self.ingest_owner = ingest_owner

def get_container_key(
self, name: Optional[str], path: Optional[List[str]]
Expand Down Expand Up @@ -426,21 +428,23 @@ def _create_external_url(self, dataset: DremioDataset) -> str:
return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"'

def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]:
if not dataset.owner:
return None
owner = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
return OwnershipClass(
owners=[
OwnerClass(
owner=owner,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
if self.ingest_owner and dataset.owner:
owner_urn = (
make_user_urn(dataset.owner)
if dataset.owner_type == "USER"
else make_group_urn(dataset.owner)
)
ownership: OwnershipClass = OwnershipClass(
owners=[
OwnerClass(
owner=owner_urn,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
]
)
return ownership

return None

def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass:
return GlossaryTermsClass(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,8 @@ def is_profiling_enabled(self) -> bool:
default=False,
description="Whether to include query-based lineage information.",
)

ingest_owner: bool = Field(
default=True,
description="Ingest Owner from source. This will override Owner info entered from UI",
)
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def __init__(self, config: DremioSourceConfig, ctx: PipelineContext):
self.dremio_aspects = DremioAspects(
platform=self.get_platform(),
domain=self.config.domain,
ingest_owner=self.config.ingest_owner,
platform_instance=self.config.platform_instance,
env=self.config.env,
ui_url=dremio_api.ui_url,
Expand Down Expand Up @@ -569,11 +570,14 @@ def process_query(self, query: DremioQuery) -> None:
)

# Add observed query

self.sql_parsing_aggregator.add_observed_query(
ObservedQuery(
query=query.query,
timestamp=query.submitted_ts,
user=CorpUserUrn(username=query.username),
user=CorpUserUrn(username=query.username)
sagar-salvi-apptware marked this conversation as resolved.
Show resolved Hide resolved
if self.config.ingest_owner
else None,
default_db=self.default_db,
)
)
Expand Down
Loading