Skip to content

Commit

Permalink
fix: make unicode characters working well in load_table_from_json (#865)
Browse files Browse the repository at this point in the history
Co-authored-by: Tim Swast <swast@google.com>
Co-authored-by: Tres Seaver <tseaver@palladion.com>
  • Loading branch information
3 people authored Aug 11, 2021
1 parent 519d99c commit ad9c802
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
2 changes: 1 addition & 1 deletion google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2762,7 +2762,7 @@ def load_table_from_json(

destination = _table_arg_to_table_ref(destination, default_project=self.project)

data_str = "\n".join(json.dumps(item) for item in json_rows)
data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
encoded_str = data_str.encode()
data_file = io.BytesIO(encoded_str)
return self.load_table_from_file(
Expand Down
36 changes: 36 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7775,6 +7775,42 @@ def test_load_table_from_json_w_invalid_job_config(self):
err_msg = str(exc.value)
assert "Expected an instance of LoadJobConfig" in err_msg

def test_load_table_from_json_unicode_emoji_data_case(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES

client = self._make_client()

emoji = "\U0001F3E6"
json_row = {"emoji": emoji}
json_rows = [json_row]

load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)

with load_patch as load_table_from_file:
client.load_table_from_json(json_rows, self.TABLE_REF)

load_table_from_file.assert_called_once_with(
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
location=client.location,
project=client.project,
job_config=mock.ANY,
timeout=None,
)

sent_data_file = load_table_from_file.mock_calls[0][1][1]

# make sure json_row's unicode characters are only encoded one time
expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}'
assert sent_data_file.getvalue() == expected_bytes

# Low-level tests

@classmethod
Expand Down

0 comments on commit ad9c802

Please sign in to comment.