googleapis · tswast · Jan 5, 2022 · Dec 6, 2021 · Dec 6, 2021 · Dec 6, 2021
@@ -4,7 +4,8 @@ db-dtypes==0.3.1
 fastavro
 flake8
 numpy==1.16.6
-google-cloud-bigquery==1.11.1
+google-cloud-bigquery==1.27.2
+google-cloud-bigquery-storage==1.1.0
 pyarrow==3.0.0
 pydata-google-auth
 pytest

@@ -8,7 +8,10 @@
 BIGQUERY_MINIMUM_VERSION = "1.11.1"
 BIGQUERY_CLIENT_INFO_VERSION = "1.12.0"
 BIGQUERY_BQSTORAGE_VERSION = "1.24.0"
+BIGQUERY_ACCURATE_TIMESTAMP_VERSION = "2.6.0"
 BIGQUERY_FROM_DATAFRAME_CSV_VERSION = "2.6.0"
+BIGQUERY_SUPPORTS_BIGNUMERIC_VERSION = "2.10.0"
+BIGQUERY_NO_DATE_AS_OBJECT_VERSION = "3.0.0dev"
 PANDAS_VERBOSITY_DEPRECATION_VERSION = "0.23.0"
 PANDAS_BOOLEAN_DTYPE_VERSION = "1.0.0"
 PANDAS_PARQUET_LOSSLESS_TIMESTAMP_VERSION = "1.1.0"
@@ -42,6 +45,13 @@ def bigquery_installed_version(self):
 
         return self._bigquery_installed_version
 
+    @property
+    def bigquery_has_accurate_timestamp(self):
+        import pkg_resources
+
+        min_version = pkg_resources.parse_version(BIGQUERY_ACCURATE_TIMESTAMP_VERSION)
+        return self.bigquery_installed_version >= min_version
+
     @property
     def bigquery_has_client_info(self):
         import pkg_resources
@@ -51,6 +61,13 @@ def bigquery_has_client_info(self):
         )
         return self.bigquery_installed_version >= bigquery_client_info_version
 
+    @property
+    def bigquery_has_bignumeric(self):
+        import pkg_resources
+
+        min_version = pkg_resources.parse_version(BIGQUERY_SUPPORTS_BIGNUMERIC_VERSION)
+        return self.bigquery_installed_version >= min_version
+
     @property
     def bigquery_has_bqstorage(self):
         import pkg_resources
@@ -69,6 +86,13 @@ def bigquery_has_from_dataframe_with_csv(self):
         )
         return self.bigquery_installed_version >= bigquery_from_dataframe_version
 
+    @property
+    def bigquery_needs_date_as_object(self):
+        import pkg_resources
+
+        max_version = pkg_resources.parse_version(BIGQUERY_NO_DATE_AS_OBJECT_VERSION)
+        return self.bigquery_installed_version < max_version
+
     @property
     def pandas_installed_version(self):
         import pandas

@@ -2,13 +2,13 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 
+from datetime import datetime
 import logging
 import re
 import time
-import warnings
-from datetime import datetime
 import typing
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Sequence, Union
+import warnings
 
 import numpy as np
 
@@ -37,21 +37,25 @@
 import pandas_gbq.schema
 import pandas_gbq.timestamp
 
-
-logger = logging.getLogger(__name__)
-
 try:
     import tqdm  # noqa
 except ImportError:
     tqdm = None
 
+logger = logging.getLogger(__name__)
+
 
 def _test_google_api_imports():
     try:
         import pkg_resources  # noqa
     except ImportError as ex:
         raise ImportError("pandas-gbq requires setuptools") from ex
 
+    try:
+        import db_dtypes  # noqa
+    except ImportError as ex:
+        raise ImportError("pandas-gbq requires db-dtypes") from ex
+
     try:
         import pydata_google_auth  # noqa
     except ImportError as ex:
@@ -546,6 +550,8 @@ def _download_results(
         to_dataframe_kwargs = {}
         if FEATURES.bigquery_has_bqstorage:
             to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client
+        if FEATURES.bigquery_needs_date_as_object:
+            to_dataframe_kwargs["date_as_object"] = True
 
         try:
             schema_fields = [field.to_api_repr() for field in rows_iter.schema]
@@ -559,11 +565,7 @@ def _download_results(
         except self.http_error as ex:
             self.process_http_error(ex)
 
-        if df.empty:
-            df = _cast_empty_df_dtypes(schema_fields, df)
-
-        # Ensure any TIMESTAMP columns are tz-aware.
-        df = pandas_gbq.timestamp.localize_df(df, schema_fields)
+        df = _finalize_dtypes(df, schema_fields)
 
         logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
         return df
@@ -617,23 +619,18 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
     See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     #missing-data-casting-rules-and-indexing
     """
+    import db_dtypes
+
     # If you update this mapping, also update the table at
     # `docs/reading.rst`.
     dtype_map = {
-        "DATE": "datetime64[ns]",
-        "DATETIME": "datetime64[ns]",
         "FLOAT": np.dtype(float),
-        "GEOMETRY": "object",
         "INTEGER": "Int64",
-        "RECORD": "object",
-        "STRING": "object",
-        # datetime.time objects cannot be case to datetime64.
-        # https://github.com/pydata/pandas-gbq/issues/328
-        "TIME": "object",
-        # pandas doesn't support timezone-aware dtype in DataFrame/Series
-        # constructors. It's more idiomatic to localize after construction.
-        # https://github.com/pandas-dev/pandas/issues/25843
-        "TIMESTAMP": "datetime64[ns]",
+        "TIME": db_dtypes.TimeDtype(),
+        # Note: Other types such as 'datetime64[ns]' and db_types.DateDtype()
+        # are not included because the pandas range does not align with the
+        # BigQuery range. We need to attempt a conversion to those types and
+        # fall back to 'object' when there are out-of-range values.
     }
 
     # Amend dtype_map with newer extension types if pandas version allows.
@@ -656,28 +653,43 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
     return dtypes
 
 
-def _cast_empty_df_dtypes(schema_fields, df):
-    """Cast any columns in an empty dataframe to correct type.
+def _finalize_dtypes(
+    df: "pandas.DataFrame", schema_fields: Sequence[Dict[str, Any]]
+) -> "pandas.DataFrame":
+    """
+    Attempt to change the dtypes of those columns that don't map exactly.
 
-    In an empty dataframe, pandas cannot choose a dtype unless one is
-    explicitly provided. The _bqschema_to_nullsafe_dtypes() function only
-    provides dtypes when the dtype safely handles null values. This means
-    that empty int64 and boolean columns are incorrectly classified as
-    ``object``.
+    For example db_dtypes.DateDtype() and datetime64[ns] cannot represent
+    0001-01-01, but they can represent dates within a couple hundred years of
+    1970. See:
+    https://github.com/googleapis/python-bigquery-pandas/issues/365
     """
-    if not df.empty:
-        raise ValueError("DataFrame must be empty in order to cast non-nullsafe dtypes")
+    import db_dtypes
+    import pandas.api.types
 
-    dtype_map = {"BOOLEAN": bool, "INTEGER": np.int64}
+    # If you update this mapping, also update the table at
+    # `docs/reading.rst`.
+    dtype_map = {
+        "DATE": db_dtypes.DateDtype(),
+        "DATETIME": "datetime64[ns]",
+        "TIMESTAMP": "datetime64[ns]",
+    }
 
     for field in schema_fields:
-        column = str(field["name"])
+        # This method doesn't modify ARRAY/REPEATED columns.
         if field["mode"].upper() == "REPEATED":
             continue
 
+        name = str(field["name"])
         dtype = dtype_map.get(field["type"].upper())
-        if dtype:
-            df[column] = df[column].astype(dtype)
+
+        # Avoid deprecated conversion to timezone-naive dtype by only casting
+        # object dtypes.
+        if dtype and pandas.api.types.is_object_dtype(df[name]):
+            df[name] = df[name].astype(dtype, errors="ignore")
+
+    # Ensure any TIMESTAMP columns are tz-aware.
+    df = pandas_gbq.timestamp.localize_df(df, schema_fields)
 
     return df
 

@@ -30,11 +30,6 @@ def localize_df(df, schema_fields):
     pandas.DataFrame
         DataFrame with localized TIMESTAMP columns.
     """
-    if len(df.index) == 0:
-        # If there are no rows, there is nothing to do.
-        # Fix for https://github.com/pydata/pandas-gbq/issues/299
-        return df
-
     for field in schema_fields:
         column = str(field["name"])
         if "mode" in field and field["mode"].upper() == "REPEATED":

@@ -28,12 +28,19 @@
     "pandas >=0.24.2",
     "pyarrow >=3.0.0, <7.0dev",
     "pydata-google-auth",
-    "google-api-core >=1.14.0",
-    "google-auth >=1.4.1",
+    # Note: google-api-core and google-auth are also included via transitive
+    # dependency on google-cloud-bigquery, but this library also uses them
+    # directly.
+    "google-api-core >=1.21.0",
+    "google-auth >=1.18.0",
     "google-auth-oauthlib >=0.0.1",
-    # 2.4.* has a bug where waiting for the query can hang indefinitely.
-    # https://github.com/pydata/pandas-gbq/issues/343
-    "google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<4.0.0dev,!=2.4.*",
+    # Require 1.27.* because it has a fix for out-of-bounds timestamps.  See:
+    # https://github.com/googleapis/python-bigquery/pull/209 and
+    # https://github.com/googleapis/python-bigquery-pandas/issues/365
+    # Exclude 2.4.* because it has a bug where waiting for the query can hang
+    # indefinitely. https://github.com/pydata/pandas-gbq/issues/343
+    "google-cloud-bigquery >=1.27.2,<4.0.0dev,!=2.4.*",
+    "google-cloud-bigquery-storage >=1.1.0,<3.0.0dev",
 ]
 extras = {
     "tqdm": "tqdm>=4.23.0",

@@ -6,10 +6,10 @@
 # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
 # Then this file should have foo==1.14.0
 db-dtypes==0.3.1
-google-api-core==1.14.0
-google-auth==1.4.1
+google-api-core==1.21.0
+google-auth==1.18.0
 google-auth-oauthlib==0.0.1
-google-cloud-bigquery==1.11.1
+google-cloud-bigquery==1.27.2
 google-cloud-bigquery-storage==1.1.0
 numpy==1.16.6
 pandas==0.24.2