Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest/bigquery): support using table read permission without profiling #10699

Merged
merged 1 commit into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def metadata_read_capability_test(
project_id=project_id,
dataset_name=result[0].name,
tables={},
with_data_read_permission=config.is_profiling_enabled(),
with_data_read_permission=config.have_table_data_read_permission,
)
if len(list(tables)) == 0:
return CapabilityReport(
Expand Down Expand Up @@ -1353,7 +1353,7 @@ def get_tables_for_dataset(
project_id,
dataset_name,
items_to_get,
with_data_read_permission=self.config.is_profiling_enabled(),
with_data_read_permission=self.config.have_table_data_read_permission,
)
items_to_get.clear()

Expand All @@ -1362,7 +1362,7 @@ def get_tables_for_dataset(
project_id,
dataset_name,
items_to_get,
with_data_read_permission=self.config.is_profiling_enabled(),
with_data_read_permission=self.config.have_table_data_read_permission,
)

self.report.metadata_extraction_sec[f"{project_id}.{dataset_name}"] = round(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ class BigQueryV2Config(
description="Number of partitioned table queried in batch when getting metadata. This is a low level config property which should be touched with care. This restriction is needed because we query partitions system view which throws error if we try to touch too many tables.",
)

use_tables_list_query_v2: bool = Field(
default=False,
description="List tables using an improved query that extracts partitions and last modified timestamps more accurately. Requires the ability to read table data. Automatically enabled when profiling is enabled.",
)

@property
def have_table_data_read_permission(self) -> bool:
return self.use_tables_list_query_v2 or self.is_profiling_enabled()

column_limit: int = Field(
default=300,
description="Maximum number of columns to process in a table. This is a low level config property which should be touched with care. This restriction is needed because excessively wide tables can result in failure to ingest the schema.",
Expand Down
Loading