Skip to content

Commit

Permalink
Enable schema autodetect if no explicit schema
Browse files Browse the repository at this point in the history
  • Loading branch information
plamut committed Aug 22, 2019
1 parent 197a0be commit 919d30e
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
3 changes: 3 additions & 0 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1649,6 +1649,9 @@ def load_table_from_json(
job_config = copy.deepcopy(job_config)
job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON

if job_config.schema is None:
job_config.autodetect = True

if project is None:
project = self.project

Expand Down
36 changes: 35 additions & 1 deletion bigquery/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,7 +915,6 @@ def test_load_table_from_json_basic_use(self):
{"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
]

job_config = bigquery.LoadJobConfig(schema=table_schema)
dataset_id = _make_dataset_id("bq_system_test")
self.temp_dataset(dataset_id)
table_id = "{}.{}.load_table_from_json_basic_use".format(
Expand All @@ -939,6 +938,41 @@ def test_load_table_from_json_basic_use(self):
self.assertEqual(tuple(table.schema), table_schema)
self.assertEqual(table.num_rows, 2)

def test_load_table_from_json_schema_autodetect(self):
# Use schema with NULLABLE fields, because schema autodetection
# defaults to field mode NULLABLE.
table_schema = (
bigquery.SchemaField("name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"),
bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"),
)

json_rows = [
{"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
{"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
]

dataset_id = _make_dataset_id("bq_system_test")
self.temp_dataset(dataset_id)
table_id = "{}.{}.load_table_from_json_basic_use".format(
Config.CLIENT.project, dataset_id
)

# Create the table before loading so that schema mismatch errors are
# identified.
table = retry_403(Config.CLIENT.create_table)(
Table(table_id, schema=table_schema)
)
self.to_delete.insert(0, table)

load_job = Config.CLIENT.load_table_from_json(json_rows, table_id)
load_job.result()

table = Config.CLIENT.get_table(table)
self.assertEqual(tuple(table.schema), table_schema)
self.assertEqual(table.num_rows, 2)

def test_load_avro_from_uri_then_dump_table(self):
from google.cloud.bigquery.job import CreateDisposition
from google.cloud.bigquery.job import SourceFormat
Expand Down
2 changes: 2 additions & 0 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5615,6 +5615,7 @@ def test_load_table_from_json_basic_use(self):
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
assert sent_config.schema is None
assert sent_config.autodetect

def test_load_table_from_json_non_default_args(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
Expand Down Expand Up @@ -5658,6 +5659,7 @@ def test_load_table_from_json_non_default_args(self):
assert job_config.source_format is None # the original was not modified
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
assert sent_config.schema is None
assert sent_config.autodetect

# Low-level tests

Expand Down

0 comments on commit 919d30e

Please sign in to comment.