Skip to content

Commit

Permalink
fix: allow extreme DATE values such as datetime.date(1, 1, 1) in `l…
Browse files Browse the repository at this point in the history
…oad_gbq` (#442)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-pandas/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #441 
Towards #365 
🦕
  • Loading branch information
tswast authored Dec 7, 2021
1 parent 928e47b commit e13abaf
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 12 deletions.
2 changes: 1 addition & 1 deletion ci/requirements-3.7-0.24.2.conda
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
codecov
coverage
db-dtypes==0.3.0
db-dtypes==0.3.1
fastavro
flake8
numpy==1.16.6
Expand Down
13 changes: 7 additions & 6 deletions pandas_gbq/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,13 @@ def cast_dataframe_for_parquet(
# Use extension dtype first so that it uses the correct equality operator.
and db_dtypes.DateDtype() != dataframe[column_name].dtype
):
# Construct converted column manually, because I can't use
# .astype() with DateDtype. With .astype(), I get the error:
#
# TypeError: Cannot interpret '<db_dtypes.DateDtype ...>' as a data type
cast_column = pandas.Series(
dataframe[column_name], dtype=db_dtypes.DateDtype()
cast_column = dataframe[column_name].astype(
dtype=db_dtypes.DateDtype(),
# Return the original column if there was an error converting
# to the dtype, such as is there is a date outside the
# supported range.
# https://github.com/googleapis/python-bigquery-pandas/issues/441
errors="ignore",
)
elif column_type in {"NUMERIC", "DECIMAL", "BIGNUMERIC", "BIGDECIMAL"}:
cast_column = dataframe[column_name].map(decimal.Decimal)
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,16 @@
release_status = "Development Status :: 4 - Beta"
dependencies = [
"setuptools",
"db-dtypes >=0.3.0,<2.0.0",
"numpy>=1.16.6",
"pandas>=0.24.2",
"db-dtypes >=0.3.1,<2.0.0",
"numpy >=1.16.6",
"pandas >=0.24.2",
"pyarrow >=3.0.0, <7.0dev",
"pydata-google-auth",
"google-auth",
"google-auth-oauthlib",
# 2.4.* has a bug where waiting for the query can hang indefinitely.
# https://github.com/pydata/pandas-gbq/issues/343
"google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*",
"google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<3.0.0dev,!=2.4.*",
]
extras = {
"tqdm": "tqdm>=4.23.0",
Expand Down
2 changes: 1 addition & 1 deletion testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
# Then this file should have foo==1.14.0
db-dtypes==0.3.0
db-dtypes==0.3.1
google-auth==1.4.1
google-auth-oauthlib==0.0.1
google-cloud-bigquery==1.11.1
Expand Down
48 changes: 48 additions & 0 deletions tests/system/test_to_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,54 @@ def test_series_round_trip(
{"name": "num_col", "type": "NUMERIC"},
],
),
pytest.param(
*DataFrameRoundTripTestCase(
input_df=pandas.DataFrame(
{
"row_num": [1, 2, 3],
# DATE valuess outside the pandas range for timestamp
# aren't supported by the db-dtypes package.
# https://github.com/googleapis/python-bigquery-pandas/issues/441
"date_col": [
datetime.date(1, 1, 1),
datetime.date(1970, 1, 1),
datetime.date(9999, 12, 31),
],
# TODO: DATETIME/TIMESTAMP values outside of the range for
# pandas timestamp require `date_as_object` parameter in
# google-cloud-bigquery versions 1.x and 2.x.
# https://github.com/googleapis/python-bigquery-pandas/issues/365
# "datetime_col": [
# datetime.datetime(1, 1, 1),
# datetime.datetime(1970, 1, 1),
# datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
# ],
# "timestamp_col": [
# datetime.datetime(1, 1, 1, tzinfo=datetime.timezone.utc),
# datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
# datetime.datetime(
# 9999,
# 12,
# 31,
# 23,
# 59,
# 59,
# 999999,
# tzinfo=datetime.timezone.utc,
# ),
# ],
},
columns=["row_num", "date_col", "datetime_col", "timestamp_col"],
),
table_schema=[
{"name": "row_num", "type": "INTEGER"},
{"name": "date_col", "type": "DATE"},
{"name": "datetime_col", "type": "DATETIME"},
{"name": "timestamp_col", "type": "TIMESTAMP"},
],
),
id="issue365-extreme-datetimes",
),
]


Expand Down

0 comments on commit e13abaf

Please sign in to comment.