From 919d30ed6414de22540046e23eb72e0836631806 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 22 Aug 2019 12:01:25 +0200 Subject: [PATCH] Enable schema autodetect if no explicit schema --- bigquery/google/cloud/bigquery/client.py | 3 ++ bigquery/tests/system.py | 36 +++++++++++++++++++++++- bigquery/tests/unit/test_client.py | 2 ++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 86061b0b23c8..f39a375833c4 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -1649,6 +1649,9 @@ def load_table_from_json( job_config = copy.deepcopy(job_config) job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON + if job_config.schema is None: + job_config.autodetect = True + if project is None: project = self.project diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index ab660280a1d8..84d38ac09e25 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -915,7 +915,6 @@ def test_load_table_from_json_basic_use(self): {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, ] - job_config = bigquery.LoadJobConfig(schema=table_schema) dataset_id = _make_dataset_id("bq_system_test") self.temp_dataset(dataset_id) table_id = "{}.{}.load_table_from_json_basic_use".format( @@ -939,6 +938,41 @@ def test_load_table_from_json_basic_use(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) + def test_load_table_from_json_schema_autodetect(self): + # Use schema with NULLABLE fields, because schema autodetection + # defaults to field mode NULLABLE. + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), + bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"), + bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), + ) + + json_rows = [ + {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, + {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True}, + ] + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_json_basic_use".format( + Config.CLIENT.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + load_job = Config.CLIENT.load_table_from_json(json_rows, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 2) + def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index cb86561c67b4..87081d10ad34 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -5615,6 +5615,7 @@ def test_load_table_from_json_basic_use(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON assert sent_config.schema is None + assert sent_config.autodetect def test_load_table_from_json_non_default_args(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -5658,6 +5659,7 @@ def test_load_table_from_json_non_default_args(self): assert job_config.source_format is None # the original was not modified assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON assert sent_config.schema is None + assert sent_config.autodetect # Low-level tests