From 0a8cc6461838a061aec0331ff1823c336b79803b Mon Sep 17 00:00:00 2001 From: Tsotne Tabidze Date: Thu, 20 May 2021 12:30:48 -0700 Subject: [PATCH 1/4] Don't create bigquery dataset if dataset field is defined in config Signed-off-by: Tsotne Tabidze --- .../feast/infra/offline_stores/bigquery.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index dd991f864b..18e63a7901 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -102,8 +102,15 @@ def get_historical_features( assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + # We should create a new dataset if the dataset name was not overridden in the config + should_create_dataset = "dataset" not in config.offline_store.__fields_set__ + table_id = _upload_entity_df_into_bigquery( - config.project, config.offline_store.dataset, entity_df, client, + config.project, + config.offline_store.dataset, + should_create_dataset, + entity_df, + client, ) entity_df_sql_table = f"`{table_id}`" else: @@ -200,15 +207,17 @@ class FeatureViewQueryContext: entity_selections: List[str] -def _upload_entity_df_into_bigquery(project, dataset_name, entity_df, client) -> str: +def _upload_entity_df_into_bigquery( + project, dataset_name, should_create_dataset, entity_df, client +) -> str: """Uploads a Pandas entity dataframe into a BigQuery table and returns a reference to the resulting table""" # First create the BigQuery dataset if it doesn't exist dataset = bigquery.Dataset(f"{client.project}.{dataset_name}") dataset.location = "US" - client.create_dataset( - dataset, exists_ok=True - ) # TODO: Consider moving this to apply or BigQueryOfflineStore + + if should_create_dataset: + client.create_dataset(dataset, exists_ok=True) # Drop the index so that we dont have unnecessary columns entity_df.reset_index(drop=True, inplace=True) From f2f22a1f96bf70a33bb6e0a0a620f841bcba6b09 Mon Sep 17 00:00:00 2001 From: Tsotne Tabidze Date: Thu, 20 May 2021 15:50:33 -0700 Subject: [PATCH 2/4] Try getting the dataset before creating it Signed-off-by: Tsotne Tabidze --- .../feast/infra/offline_stores/bigquery.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 18e63a7901..93db23e160 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -20,6 +20,7 @@ from feast.repo_config import BigQueryOfflineStoreConfig, RepoConfig try: + from google.api_core.exceptions import NotFound from google.auth.exceptions import DefaultCredentialsError from google.cloud import bigquery @@ -102,15 +103,8 @@ def get_historical_features( assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) - # We should create a new dataset if the dataset name was not overridden in the config - should_create_dataset = "dataset" not in config.offline_store.__fields_set__ - table_id = _upload_entity_df_into_bigquery( - config.project, - config.offline_store.dataset, - should_create_dataset, - entity_df, - client, + config.project, config.offline_store.dataset, entity_df, client ) entity_df_sql_table = f"`{table_id}`" else: @@ -207,17 +201,20 @@ class FeatureViewQueryContext: entity_selections: List[str] -def _upload_entity_df_into_bigquery( - project, dataset_name, should_create_dataset, entity_df, client -) -> str: +def _upload_entity_df_into_bigquery(project, dataset_name, entity_df, client) -> str: """Uploads a Pandas entity dataframe into a BigQuery table and returns a reference to the resulting table""" # First create the BigQuery dataset if it doesn't exist dataset = bigquery.Dataset(f"{client.project}.{dataset_name}") dataset.location = "US" - if should_create_dataset: - client.create_dataset(dataset, exists_ok=True) + client.get_dataset(dataset) + + try: + client.get_dataset(dataset) + except NotFound: + # Only create the dataset if it does not exist + client.create_dataset(dataset) # Drop the index so that we dont have unnecessary columns entity_df.reset_index(drop=True, inplace=True) From 2837dd9613ceb360279a24237afba1726b2c9f67 Mon Sep 17 00:00:00 2001 From: Tsotne Tabidze Date: Thu, 20 May 2021 15:55:23 -0700 Subject: [PATCH 3/4] Remove extra client.get_dataset Signed-off-by: Tsotne Tabidze --- sdk/python/feast/infra/offline_stores/bigquery.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 93db23e160..12938b32e2 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -208,8 +208,6 @@ def _upload_entity_df_into_bigquery(project, dataset_name, entity_df, client) -> dataset = bigquery.Dataset(f"{client.project}.{dataset_name}") dataset.location = "US" - client.get_dataset(dataset) - try: client.get_dataset(dataset) except NotFound: From 43f4c081fb85ad6d780043d578e71aa435fcf257 Mon Sep 17 00:00:00 2001 From: Tsotne Tabidze Date: Thu, 20 May 2021 16:10:56 -0700 Subject: [PATCH 4/4] Add exists_ok=True back Signed-off-by: Tsotne Tabidze --- sdk/python/feast/infra/offline_stores/bigquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 12938b32e2..1d931b285f 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -212,7 +212,7 @@ def _upload_entity_df_into_bigquery(project, dataset_name, entity_df, client) -> client.get_dataset(dataset) except NotFound: # Only create the dataset if it does not exist - client.create_dataset(dataset) + client.create_dataset(dataset, exists_ok=True) # Drop the index so that we dont have unnecessary columns entity_df.reset_index(drop=True, inplace=True)