Skip to content

Commit

Permalink
fix: remove pytz dependency and require pyarrow>=3.0.0 (#875)
Browse files Browse the repository at this point in the history
* fix: remove pytz dependency

* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* fix(deps): require pyarrow>=3.0.0

* remove version check for pyarrow

* require pyarrow 3.0 in pandas extra

* remove _BIGNUMERIC_SUPPORT references from tests

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Dina Graves Portman <dinagraves@google.com>
Co-authored-by: Tim Swast <swast@google.com>
  • Loading branch information
4 people authored Aug 13, 2021
1 parent cd21df1 commit 2cb3563
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 123 deletions.
5 changes: 3 additions & 2 deletions docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete):

# [START bigquery_update_table_expiration]
import datetime
import pytz

# from google.cloud import bigquery
# client = bigquery.Client()
Expand All @@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete):
assert table.expires is None

# set table to expire 5 days from now
expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5)
expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
days=5
)
table.expires = expiration
table = client.update_table(table, ["expires"]) # API request

Expand Down
17 changes: 4 additions & 13 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import queue
import warnings

from packaging import version

try:
import pandas
except ImportError: # pragma: NO COVER
Expand Down Expand Up @@ -110,6 +108,7 @@ def pyarrow_timestamp():
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
BQ_TO_ARROW_SCALARS = {
"BIGNUMERIC": pyarrow_bignumeric,
"BOOL": pyarrow.bool_,
"BOOLEAN": pyarrow.bool_,
"BYTES": pyarrow.binary,
Expand Down Expand Up @@ -146,23 +145,15 @@ def pyarrow_timestamp():
pyarrow.date64().id: "DATETIME", # because millisecond resolution
pyarrow.binary().id: "BYTES",
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
# The exact scale and precision don't matter, see below.
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
}

if version.parse(pyarrow.__version__) >= version.parse("3.0.0"):
BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
# The exact decimal's scale and precision are not important, as only
# the type ID matters, and it's the same for all decimal256 instances.
ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
_BIGNUMERIC_SUPPORT = True
else:
_BIGNUMERIC_SUPPORT = False
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
}

else: # pragma: NO COVER
BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER
ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER
_BIGNUMERIC_SUPPORT = False # pragma: NO COVER


def bq_to_arrow_struct_data_type(field):
Expand Down
3 changes: 1 addition & 2 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import datetime
import functools
import operator
import pytz
import typing
from typing import Any, Dict, Iterable, Iterator, Optional, Tuple
import warnings
Expand Down Expand Up @@ -1969,7 +1968,7 @@ def to_dataframe(
# Pandas, we set the timestamp_as_object parameter to True, if necessary.
types_to_check = {
pyarrow.timestamp("us"),
pyarrow.timestamp("us", tz=pytz.UTC),
pyarrow.timestamp("us", tz=datetime.timezone.utc),
}

for column in record_batch:
Expand Down
3 changes: 1 addition & 2 deletions samples/client_query_w_timestamp_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def client_query_w_timestamp_params():
# [START bigquery_query_params_timestamps]
import datetime

import pytz
from google.cloud import bigquery

# Construct a BigQuery client object.
Expand All @@ -30,7 +29,7 @@ def client_query_w_timestamp_params():
bigquery.ScalarQueryParameter(
"ts_value",
"TIMESTAMP",
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc),
)
]
)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@
# grpc.Channel.close() method isn't added until 1.32.0.
# https://github.com/grpc/grpc/pull/15254
"grpcio >= 1.38.1, < 2.0dev",
"pyarrow >= 1.0.0, < 6.0dev",
"pyarrow >= 3.0.0, < 6.0dev",
],
"pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"],
"pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"],
"bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
"opentelemetry": [
Expand Down
2 changes: 1 addition & 1 deletion testing/constraints-3.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0
pandas==0.23.0
proto-plus==1.10.0
protobuf==3.12.0
pyarrow==1.0.0
pyarrow==3.0.0
requests==2.18.0
six==1.13.0
tqdm==4.7.4
14 changes: 5 additions & 9 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import psutil
import pytest

from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
from . import helpers

try:
Expand Down Expand Up @@ -1972,15 +1971,12 @@ def test_query_w_query_params(self):
"expected": {"friends": [phred_name, bharney_name]},
"query_parameters": [with_friends_param],
},
{
"sql": "SELECT @bignum_param",
"expected": bignum,
"query_parameters": [bignum_param],
},
]
if _BIGNUMERIC_SUPPORT:
examples.append(
{
"sql": "SELECT @bignum_param",
"expected": bignum,
"query_parameters": [bignum_param],
}
)

for example in examples:
jconfig = QueryJobConfig()
Expand Down
42 changes: 18 additions & 24 deletions tests/system/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@
import google.api_core.retry
import pkg_resources
import pytest
import pytz

from google.cloud import bigquery
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
from . import helpers


Expand Down Expand Up @@ -64,7 +62,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i
datetime.datetime(2012, 3, 14, 15, 16),
],
dtype="datetime64[ns]",
).dt.tz_localize(pytz.utc),
).dt.tz_localize(datetime.timezone.utc),
),
(
"dt_col",
Expand Down Expand Up @@ -189,12 +187,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("num_col", "NUMERIC"),
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
)
if _BIGNUMERIC_SUPPORT:
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)

table_schema = scalars_schema + (
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
Expand All @@ -216,12 +213,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
("geo_col", nulls),
("int_col", nulls),
("num_col", nulls),
("bignum_col", nulls),
("str_col", nulls),
("time_col", nulls),
("ts_col", nulls),
]
if _BIGNUMERIC_SUPPORT:
df_data.append(("bignum_col", nulls))
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())

Expand Down Expand Up @@ -297,12 +293,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("num_col", "NUMERIC"),
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
)
if _BIGNUMERIC_SUPPORT:
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)

table_schema = scalars_schema + (
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
Expand Down Expand Up @@ -340,6 +335,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
decimal.Decimal("99999999999999999999999999999.999999999"),
],
),
(
"bignum_col",
[
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
None,
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
],
),
("str_col", ["abc", None, "def"]),
(
"time_col",
Expand All @@ -348,23 +351,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
(
"ts_col",
[
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
None,
datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc),
datetime.datetime(
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
),
],
),
]
if _BIGNUMERIC_SUPPORT:
df_data.append(
(
"bignum_col",
[
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
None,
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
],
)
)
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())

Expand Down Expand Up @@ -484,10 +478,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(
(
"ts_col",
[
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
None,
datetime.datetime(
9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
),
],
),
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/job/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,11 +295,11 @@ def test_user_email(self):
@staticmethod
def _datetime_and_millis():
import datetime
import pytz
from google.cloud._helpers import _millis

now = datetime.datetime.utcnow().replace(
microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision
microsecond=123000,
tzinfo=datetime.timezone.utc, # stats timestamps have ms precision
)
return now, _millis(now)

Expand Down
Loading

0 comments on commit 2cb3563

Please sign in to comment.