Enable schema autodetect if no explicit schema

googleapis · Aug 22, 2019 · 919d30e · 919d30e
1 parent 197a0be
commit 919d30e
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 1 deletion.
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
@@ -1649,6 +1649,9 @@ def load_table_from_json(
             job_config = copy.deepcopy(job_config)
         job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON
 
+        if job_config.schema is None:
+            job_config.autodetect = True
+
         if project is None:
             project = self.project
 

diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py
@@ -915,7 +915,6 @@ def test_load_table_from_json_basic_use(self):
             {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
         ]
 
-        job_config = bigquery.LoadJobConfig(schema=table_schema)
         dataset_id = _make_dataset_id("bq_system_test")
         self.temp_dataset(dataset_id)
         table_id = "{}.{}.load_table_from_json_basic_use".format(
@@ -939,6 +938,41 @@ def test_load_table_from_json_basic_use(self):
         self.assertEqual(tuple(table.schema), table_schema)
         self.assertEqual(table.num_rows, 2)
 
+    def test_load_table_from_json_schema_autodetect(self):
+        # Use schema with NULLABLE fields, because schema autodetection
+        # defaults to field mode NULLABLE.
+        table_schema = (
+            bigquery.SchemaField("name", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
+            bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"),
+            bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"),
+        )
+
+        json_rows = [
+            {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
+            {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
+        ]
+
+        dataset_id = _make_dataset_id("bq_system_test")
+        self.temp_dataset(dataset_id)
+        table_id = "{}.{}.load_table_from_json_basic_use".format(
+            Config.CLIENT.project, dataset_id
+        )
+
+        # Create the table before loading so that schema mismatch errors are
+        # identified.
+        table = retry_403(Config.CLIENT.create_table)(
+            Table(table_id, schema=table_schema)
+        )
+        self.to_delete.insert(0, table)
+
+        load_job = Config.CLIENT.load_table_from_json(json_rows, table_id)
+        load_job.result()
+
+        table = Config.CLIENT.get_table(table)
+        self.assertEqual(tuple(table.schema), table_schema)
+        self.assertEqual(table.num_rows, 2)
+
     def test_load_avro_from_uri_then_dump_table(self):
         from google.cloud.bigquery.job import CreateDisposition
         from google.cloud.bigquery.job import SourceFormat

diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
@@ -5615,6 +5615,7 @@ def test_load_table_from_json_basic_use(self):
         sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
         assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
         assert sent_config.schema is None
+        assert sent_config.autodetect
 
     def test_load_table_from_json_non_default_args(self):
         from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
@@ -5658,6 +5659,7 @@ def test_load_table_from_json_non_default_args(self):
         assert job_config.source_format is None  # the original was not modified
         assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
         assert sent_config.schema is None
+        assert sent_config.autodetect
 
     # Low-level tests