Skip to content

Commit

Permalink
Only load fields that are needed from Google API for Big Query schema. (
Browse files Browse the repository at this point in the history
  • Loading branch information
jezdez authored Sep 30, 2020
1 parent 963ee5f commit 3e76946
Showing 1 changed file with 34 additions and 18 deletions.
52 changes: 34 additions & 18 deletions redash/query_runner/big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,39 +376,55 @@ def get_schema(self, get_stats=False):

service = self._get_bigquery_service()
project_id = self._get_project_id()
datasets = service.datasets().list(projectId=project_id).execute()
# get a list of Big Query datasets
datasets_request = service.datasets().list(
projectId=project_id,
fields="datasets/datasetReference/datasetId,nextPageToken",
)
datasets = []
while datasets_request:
# request datasets
datasets_response = datasets_request.execute()
# store results
datasets.extend(datasets_response.get("datasets", []))
# try loading next page
datasets_request = service.datasets().list_next(
datasets_request,
datasets_response,
)

schema = []
for dataset in datasets.get("datasets", []):
# load all tables for all datasets
for dataset in datasets:
dataset_id = dataset["datasetReference"]["datasetId"]
tables = (
service.tables()
.list(projectId=project_id, datasetId=dataset_id)
.execute()
tables_request = service.tables().list(
projectId=project_id,
datasetId=dataset_id,
fields="tables/tableReference/tableId,nextPageToken",
)
while True:
for table in tables.get("tables", []):
while tables_request:
# request tables with fields above
tables_response = tables_request.execute()
for table in tables_response.get("tables", []):
# load schema for given table
table_data = (
service.tables()
.get(
projectId=project_id,
datasetId=dataset_id,
tableId=table["tableReference"]["tableId"],
fields="id,schema",
)
.execute()
)
# build schema data with given table data
table_schema = self._get_columns_schema(table_data)
schema.append(table_schema)

next_token = tables.get("nextPageToken", None)
if next_token is None:
break

tables = (
service.tables()
.list(
projectId=project_id, datasetId=dataset_id, pageToken=next_token
)
.execute()
# try loading next page of results
tables_request = service.tables().list_next(
tables_request,
tables_response,
)

return schema
Expand Down

0 comments on commit 3e76946

Please sign in to comment.