Skip to content

Commit

Permalink
feat(ingest/bigquery): support using table read permission without pr…
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored and sleeperdeep committed Jun 25, 2024
1 parent a309260 commit c1e8671
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def metadata_read_capability_test(
project_id=project_id,
dataset_name=result[0].name,
tables={},
with_data_read_permission=config.is_profiling_enabled(),
with_data_read_permission=config.have_table_data_read_permission,
)
if len(list(tables)) == 0:
return CapabilityReport(
Expand Down Expand Up @@ -1380,7 +1380,7 @@ def get_tables_for_dataset(
project_id,
dataset_name,
items_to_get,
with_data_read_permission=self.config.is_profiling_enabled(),
with_data_read_permission=self.config.have_table_data_read_permission,
)
items_to_get.clear()

Expand All @@ -1389,7 +1389,7 @@ def get_tables_for_dataset(
project_id,
dataset_name,
items_to_get,
with_data_read_permission=self.config.is_profiling_enabled(),
with_data_read_permission=self.config.have_table_data_read_permission,
)

self.report.metadata_extraction_sec[f"{project_id}.{dataset_name}"] = round(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,15 @@ class BigQueryV2Config(
description="Number of partitioned table queried in batch when getting metadata. This is a low level config property which should be touched with care. This restriction is needed because we query partitions system view which throws error if we try to touch too many tables.",
)

use_tables_list_query_v2: bool = Field(
default=False,
description="List tables using an improved query that extracts partitions and last modified timestamps more accurately. Requires the ability to read table data. Automatically enabled when profiling is enabled.",
)

@property
def have_table_data_read_permission(self) -> bool:
return self.use_tables_list_query_v2 or self.is_profiling_enabled()

column_limit: int = Field(
default=300,
description="Maximum number of columns to process in a table. This is a low level config property which should be touched with care. This restriction is needed because excessively wide tables can result in failure to ingest the schema.",
Expand Down

0 comments on commit c1e8671

Please sign in to comment.