Skip to content

Commit

Permalink
boost coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
tswast committed Dec 21, 2021
1 parent 6704991 commit a9075df
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
7 changes: 6 additions & 1 deletion pandas_gbq/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ def load_csv_from_file(
chunksize: Optional[int],
schema: Optional[Dict[str, Any]],
):
"""Manually encode a DataFrame to CSV and use the buffer in a load job.
This method is needed for writing with google-cloud-bigquery versions that
don't implment load_table_from_dataframe with the CSV serialization format.
"""
if schema is None:
schema = pandas_gbq.schema.generate_bq_schema(dataframe)

Expand All @@ -203,7 +208,7 @@ def load_chunk(chunk, job_config):
finally:
chunk_buffer.close()

return load_csv(dataframe, chunksize, bq_schema, load_chunk,)
return load_csv(dataframe, chunksize, bq_schema, load_chunk)


def load_chunks(
Expand Down
2 changes: 2 additions & 0 deletions pandas_gbq/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ def generate_bq_schema(dataframe, default_type="STRING"):
"S": "STRING",
"U": "STRING",
"M": "TIMESTAMP",
# TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
# localized.
}

fields = []
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,62 @@ def test_encode_chunks_with_chunksize_none():
assert len(chunk.index) == 6


def test_load_csv_from_file_generates_schema(mock_bigquery_client):
import google.cloud.bigquery

df = pandas.DataFrame(
{
"int_col": [1, 2, 3],
"bool_col": [True, False, True],
"float_col": [0.0, 1.25, -2.75],
"string_col": ["a", "b", "c"],
"datetime_col": pandas.Series(
[
"2021-12-21 13:28:40.123789",
"2000-01-01 11:10:09",
"2040-10-31 23:59:59.999999",
],
dtype="datetime64[ns]",
),
"timestamp_col": pandas.Series(
[
"2021-12-21 13:28:40.123789",
"2000-01-01 11:10:09",
"2040-10-31 23:59:59.999999",
],
dtype="datetime64[ns]",
).dt.tz_localize(datetime.timezone.utc),
}
)
destination = google.cloud.bigquery.TableReference.from_string(
"my-project.my_dataset.my_table"
)

_ = list(
load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None)
)

mock_load = mock_bigquery_client.load_table_from_file
assert mock_load.called
_, kwargs = mock_load.call_args
assert "job_config" in kwargs
sent_schema = kwargs["job_config"].schema
assert sent_schema[0].name == "int_col"
assert sent_schema[0].field_type == "INTEGER"
assert sent_schema[1].name == "bool_col"
assert sent_schema[1].field_type == "BOOLEAN"
assert sent_schema[2].name == "float_col"
assert sent_schema[2].field_type == "FLOAT"
assert sent_schema[3].name == "string_col"
assert sent_schema[3].field_type == "STRING"
# TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
# localized.
assert sent_schema[4].name == "datetime_col"
assert sent_schema[4].field_type == "TIMESTAMP"
assert sent_schema[5].name == "timestamp_col"
assert sent_schema[5].field_type == "TIMESTAMP"


@pytest.mark.parametrize(
["bigquery_has_from_dataframe_with_csv", "api_method"],
[(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")],
Expand Down

0 comments on commit a9075df

Please sign in to comment.