From c6acba1834030df9d07dfc7524723e9e16ae9ca3 Mon Sep 17 00:00:00 2001 From: Nathan Hadfield Date: Tue, 11 Nov 2025 14:06:24 +0000 Subject: [PATCH] fix: add required arguments when creating an external table --- .../google/cloud/transfers/gcs_to_bigquery.py | 2 + .../cloud/transfers/test_gcs_to_bigquery.py | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/providers/google/src/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py b/providers/google/src/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py index dd1fb8e59ba8a..97a8efbd43987 100644 --- a/providers/google/src/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +++ b/providers/google/src/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py @@ -593,6 +593,8 @@ def _create_external_table(self): self.hook.create_table( table_resource=table_obj_api_repr, project_id=self.project_id or self.hook.project_id, + dataset_id=table.dataset_id, + table_id=table.table_id, location=self.location, exists_ok=True, ) diff --git a/providers/google/tests/unit/google/cloud/transfers/test_gcs_to_bigquery.py b/providers/google/tests/unit/google/cloud/transfers/test_gcs_to_bigquery.py index ad6f3a08a6f5b..4383ce7ddc96e 100644 --- a/providers/google/tests/unit/google/cloud/transfers/test_gcs_to_bigquery.py +++ b/providers/google/tests/unit/google/cloud/transfers/test_gcs_to_bigquery.py @@ -123,6 +123,8 @@ def test_max_value_external_table_should_execute_successfully(self, hook): exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -155,6 +157,33 @@ def test_max_value_external_table_should_execute_successfully(self, hook): project_id=JOB_PROJECT_ID, ) + @mock.patch(GCS_TO_BQ_PATH.format("BigQueryHook")) + def test_external_table_explicitly_passes_dataset_and_table_ids(self, hook): + hook.return_value.insert_job.side_effect = [ + MagicMock(job_id=REAL_JOB_ID, error_result=False), + REAL_JOB_ID, + ] + hook.return_value.generate_job_id.return_value = REAL_JOB_ID + hook.return_value.split_tablename.return_value = (PROJECT_ID, DATASET, TABLE) + + def _validate_create_table(**kwargs): + assert kwargs["dataset_id"] == DATASET + assert kwargs["table_id"] == TABLE + + hook.return_value.create_table.side_effect = _validate_create_table + + operator = GCSToBigQueryOperator( + task_id=TASK_ID, + bucket=TEST_BUCKET, + source_objects=TEST_SOURCE_OBJECTS, + destination_project_dataset_table=TEST_EXPLICIT_DEST, + schema_fields=SCHEMA_FIELDS, + external_table=True, + project_id=JOB_PROJECT_ID, + ) + + operator.execute(context=MagicMock()) + @mock.patch(GCS_TO_BQ_PATH.format("BigQueryHook")) def test_max_value_without_external_table_should_execute_successfully(self, hook): hook.return_value.insert_job.side_effect = [ @@ -333,6 +362,8 @@ def test_labels_external_table_should_execute_successfully(self, hook): exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": LABELS, @@ -434,6 +465,8 @@ def test_description_external_table_should_execute_successfully(self, hook): exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -536,6 +569,8 @@ def test_source_objs_as_list_external_table_should_execute_successfully(self, ho exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -640,6 +675,8 @@ def test_source_objs_as_string_external_table_should_execute_successfully(self, exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -745,6 +782,8 @@ def test_schema_obj_external_table_should_execute_successfully(self, bq_hook, gc exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -849,6 +888,8 @@ def test_autodetect_none_external_table_should_execute_successfully(self, hook): exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -1051,6 +1092,8 @@ def test_schema_fields_integer_scanner_external_table_should_execute_successfull exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -1235,6 +1278,8 @@ def test_schema_fields_external_table_should_execute_successfully(self, hook): exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": {"projectId": PROJECT_ID, "datasetId": DATASET, "tableId": TABLE}, "labels": {}, @@ -1616,6 +1661,8 @@ def test_external_table_should_accept_orc_source_format(self, hook): exists_ok=True, location=None, project_id=JOB_PROJECT_ID, + dataset_id=DATASET, + table_id=TABLE, table_resource={ "tableReference": { "projectId": PROJECT_ID,