From 9a9d3fda24d41457cb0ab1c803388e096ab6afcc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 6 Dec 2021 16:22:01 -0600 Subject: [PATCH 01/37] feat: accepts a table ID, which downloads the table without a query --- pandas_gbq/gbq.py | 38 +++++++++++++++++-------------------- tests/system/conftest.py | 19 +++++++++++++++++++ tests/system/test_to_gbq.py | 19 +++++++------------ 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 87c2327c..714c0995 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -374,7 +374,9 @@ def process_http_error(ex): raise GenericGBQException("Reason: {0}".format(ex)) - def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): + def run_query( + self, query_or_table, max_results=None, progress_bar_type=None, **kwargs + ): from concurrent.futures import TimeoutError from google.auth.exceptions import RefreshError @@ -391,20 +393,20 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): job_config.update(config) if "query" in config and "query" in config["query"]: - if query is not None: + if query_or_table is not None: raise ValueError( "Query statement can't be specified " "inside config while it is specified " "as parameter" ) - query = config["query"].pop("query") + query_or_table = config["query"].pop("query") self._start_timer() try: logger.debug("Requesting query... ") query_reply = self.client.query( - query, + query_or_table, job_config=bigquery.QueryJobConfig.from_api_repr(job_config), location=self.location, project=self.project_id, @@ -639,7 +641,7 @@ def _cast_empty_df_dtypes(schema_fields, df): def read_gbq( - query, + query_or_table, project_id=None, index_col=None, col_order=None, @@ -663,17 +665,18 @@ def read_gbq( This method uses the Google Cloud client library to make requests to Google BigQuery, documented `here - `__. + `__. See the :ref:`How to authenticate with Google BigQuery ` guide for authentication instructions. Parameters ---------- - query : str - SQL-Like Query to return data values. + query_or_table : str + SQL query to return data values. If the string is a table ID, fetch the + rows directly from the table without running a query. project_id : str, optional - Google BigQuery Account project ID. Optional when available from + Google Cloud Platform project ID. Optional when available from the environment. index_col : str, optional Name of result column to use for index in results DataFrame. @@ -688,9 +691,9 @@ def read_gbq( when getting user credentials. .. _local webserver flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server .. _console flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console .. versionadded:: 0.2.0 dialect : str, default 'standard' @@ -740,13 +743,6 @@ def read_gbq( `__ permission on the project you are billing queries to. - **Note:** Due to a `known issue in the ``google-cloud-bigquery`` - package - `__ - (fixed in version 1.11.0), you must write your query results to a - destination table. To do this with ``read_gbq``, supply a - ``configuration`` dictionary. - This feature requires the ``google-cloud-bigquery-storage`` and ``pyarrow`` packages. @@ -830,7 +826,7 @@ def read_gbq( ) final_df = connector.run_query( - query, + query_or_table, configuration=configuration, max_results=max_results, progress_bar_type=progress_bar_type, @@ -884,7 +880,7 @@ def to_gbq( This method uses the Google Cloud client library to make requests to Google BigQuery, documented `here - `__. + `__. See the :ref:`How to authenticate with Google BigQuery ` guide for authentication instructions. @@ -897,7 +893,7 @@ def to_gbq( Name of table to be written, in the form ``dataset.tablename`` or ``project.dataset.tablename``. project_id : str, optional - Google BigQuery Account project ID. Optional when available from + Google Cloud Platform project ID. Optional when available from the environment. chunksize : int, optional Number of rows to be inserted in each chunk from the dataframe. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 6ac55220..4ba8bf31 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -3,6 +3,7 @@ # license that can be found in the LICENSE file. import os +import functools import pathlib from google.cloud import bigquery @@ -56,6 +57,24 @@ def project(project_id): return project_id +@pytest.fixture +def to_gbq(credentials, project_id): + import pandas_gbq + + return functools.partial( + pandas_gbq.to_gbq, project_id=project_id, credentials=credentials + ) + + +@pytest.fixture +def read_gbq(credentials, project_id): + import pandas_gbq + + return functools.partial( + pandas_gbq.read_gbq, project_id=project_id, credentials=credentials + ) + + @pytest.fixture() def random_dataset_id(bigquery_client: bigquery.Client, project_id: str): dataset_id = prefixer.create_prefix() diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index 4421f3be..f8d9c7f7 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -5,7 +5,6 @@ import datetime import decimal import collections -import functools import random import db_dtypes @@ -23,12 +22,8 @@ def api_method(request): @pytest.fixture -def method_under_test(credentials, project_id): - import pandas_gbq - - return functools.partial( - pandas_gbq.to_gbq, project_id=project_id, credentials=credentials - ) +def method_under_test(to_gbq): + return to_gbq SeriesRoundTripTestCase = collections.namedtuple( @@ -98,7 +93,7 @@ def method_under_test(credentials, project_id): def test_series_round_trip( method_under_test, random_dataset_id, - bigquery_client, + read_gbq, input_series, api_method, api_methods, @@ -114,7 +109,7 @@ def test_series_round_trip( ) method_under_test(df, table_id, api_method=api_method) - round_trip = bigquery_client.list_rows(table_id).to_dataframe() + round_trip = read_gbq(table_id) round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True) pandas.testing.assert_series_equal( round_trip_series, input_series, check_exact=True, check_names=False, @@ -196,8 +191,8 @@ def test_series_round_trip( ) def test_dataframe_round_trip_with_table_schema( method_under_test, + read_gbq, random_dataset_id, - bigquery_client, input_df, expected_df, table_schema, @@ -212,8 +207,8 @@ def test_dataframe_round_trip_with_table_schema( method_under_test( input_df, table_id, table_schema=table_schema, api_method=api_method ) - round_trip = bigquery_client.list_rows(table_id).to_dataframe( - dtypes=dict(zip(expected_df.columns, expected_df.dtypes)) + round_trip = read_gbq( + table_id, dtypes=dict(zip(expected_df.columns, expected_df.dtypes)), ) round_trip.sort_values("row_num", inplace=True) pandas.testing.assert_frame_equal(expected_df, round_trip) From 6adf2332fa7726532872a68e3283e004f9c3c1db Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 6 Dec 2021 17:12:17 -0600 Subject: [PATCH 02/37] add todo for next steps --- pandas_gbq/gbq.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 714c0995..d2cc38f9 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -506,13 +506,18 @@ def _download_results( to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client try: + # TODO: This is the only difference between table ID and query job. + # But should I refactor for + # https://github.com/googleapis/python-bigquery-pandas/issues/339 + # now? query_job.result() # Get the table schema, so that we can list rows. destination = self.client.get_table(query_job.destination) rows_iter = self.client.list_rows(destination, max_results=max_results) - schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) + # ENDTODO: This is the only difference between table ID and + conversion_dtypes.update(user_dtypes) df = rows_iter.to_dataframe( dtypes=conversion_dtypes, From 9b1eb0dc709beccdd59058874d6b9a7339da5864 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 9 Dec 2021 14:28:34 -0600 Subject: [PATCH 03/37] add unit test for table ID read_gbq --- tests/unit/conftest.py | 17 ++++++++++++++--- tests/unit/test_gbq.py | 19 ++++++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index cfa1e819..513df4b9 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -26,18 +26,29 @@ def mock_bigquery_client(monkeypatch): # Constructor returns the mock itself, so this mock can be treated as the # constructor or the instance. mock_client.return_value = mock_client - mock_schema = [google.cloud.bigquery.SchemaField("_f0", "INTEGER")] - # Mock out SELECT 1 query results. + mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) mock_query.job_id = "some-random-id" mock_query.state = "DONE" mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator) mock_rows.total_rows = 1 - mock_rows.schema = mock_schema + mock_rows.__iter__.return_value = [(1,)] mock_query.result.return_value = mock_rows + mock_client.list_rows.return_value = mock_rows mock_client.query.return_value = mock_query # Mock table creation. monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client) mock_client.reset_mock() + + # Mock out SELECT 1 query results. + def generate_schema(): + query = mock_client.query.call_args[0][0] + if query == "SELECT 1 AS int_col": + return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")] + else: + return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")] + + type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema) + return mock_client diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 8784a98b..bc12c47c 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -292,9 +292,10 @@ def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): gbq.read_gbq("SELECT 1", dialect="standard") -def test_read_gbq_with_inferred_project_id(monkeypatch): +def test_read_gbq_with_inferred_project_id(mock_bigquery_client): df = gbq.read_gbq("SELECT 1", dialect="standard") assert df is not None + mock_bigquery_client.query.assert_called_once() def test_read_gbq_with_inferred_project_id_from_service_account_credentials( @@ -505,3 +506,19 @@ def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credenti _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args assert to_dataframe_kwargs["progress_bar_type"] == "foobar" + + +def test_read_gbq_bypasses_query_with_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + mock_bigquery_client.list_rows.assert_called_with( + "my-project.my_dataset.read_gbq_table" + ) From ec9ddaff46e1911e24fbba4cfc66e743515dde63 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 9 Dec 2021 15:28:44 -0600 Subject: [PATCH 04/37] add helper for is_query --- pandas_gbq/gbq.py | 5 +++++ tests/unit/test_gbq.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index bba98f57..07fc6852 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -3,6 +3,7 @@ # license that can be found in the LICENSE file. import logging +import re import time import warnings from datetime import datetime @@ -64,6 +65,10 @@ def _test_google_api_imports(): raise ImportError("pandas-gbq requires google-cloud-bigquery") from ex +def _is_query(query_or_table: str) -> bool: + return re.search(r"\s", query_or_table.strip(), re.MULTILINE) is not None + + class DatasetCreationError(ValueError): """ Raised when the create dataset method fails diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index bc12c47c..496486ef 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -82,6 +82,19 @@ def test__bqschema_to_nullsafe_dtypes(type_, expected): assert result == {"x": expected} +@pytest.mark.parametrize( + ["query_or_table", "expected"], + [ + ("SELECT 1", True), + ("dataset.table", False), + ("project-id.dataset.table", False), + ], +) +def test__is_query(query_or_table, expected): + result = gbq._is_query(query_or_table) + assert result == expected + + def test_GbqConnector_get_client_w_old_bq(monkeypatch, mock_bigquery_client): gbq._test_google_api_imports() connector = _make_connector() From 9cc7c74c3d2f76de9ba1beb6fd15156101717be6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 15:07:44 -0600 Subject: [PATCH 05/37] implement read_gbq with table id --- pandas_gbq/gbq.py | 85 ++++++++++++++++++++++++++++-------------- tests/unit/conftest.py | 8 +++- tests/unit/test_gbq.py | 37 +++++++++++++++++- 3 files changed, 99 insertions(+), 31 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 07fc6852..6a8b6788 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -379,9 +379,26 @@ def process_http_error(ex): raise GenericGBQException("Reason: {0}".format(ex)) - def run_query( - self, query_or_table, max_results=None, progress_bar_type=None, **kwargs + def download_table( + self, table_id, max_results=None, progress_bar_type=None, dtypes=None ): + self._start_timer() + + try: + # Get the table schema, so that we can list rows. + destination = self.client.get_table(table_id) + rows_iter = self.client.list_rows(destination, max_results=max_results) + except self.http_error as ex: + self.process_http_error(ex) + + return self._download_results( + rows_iter, + max_results=max_results, + progress_bar_type=progress_bar_type, + user_dtypes=dtypes, + ) + + def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): from concurrent.futures import TimeoutError from google.auth.exceptions import RefreshError @@ -397,21 +414,12 @@ def run_query( if config is not None: job_config.update(config) - if "query" in config and "query" in config["query"]: - if query_or_table is not None: - raise ValueError( - "Query statement can't be specified " - "inside config while it is specified " - "as parameter" - ) - query_or_table = config["query"].pop("query") - self._start_timer() try: logger.debug("Requesting query... ") query_reply = self.client.query( - query_or_table, + query, job_config=bigquery.QueryJobConfig.from_api_repr(job_config), location=self.location, project=self.project_id, @@ -471,15 +479,25 @@ def run_query( ) dtypes = kwargs.get("dtypes") + + # Ensure destination is populated. + try: + query_reply.result() + except self.http_error as ex: + self.process_http_error(ex) + + # Get the table schema, so that we can list rows. + destination = self.client.get_table(query_reply.destination) + rows_iter = self.client.list_rows(destination, max_results=max_results) return self._download_results( - query_reply, + rows_iter, max_results=max_results, progress_bar_type=progress_bar_type, user_dtypes=dtypes, ) def _download_results( - self, query_job, max_results=None, progress_bar_type=None, user_dtypes=None, + self, rows_iter, max_results=None, progress_bar_type=None, user_dtypes=None, ): # No results are desired, so don't bother downloading anything. if max_results == 0: @@ -511,14 +529,6 @@ def _download_results( to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client try: - # TODO: This is the only difference between table ID and query job. - # But should I refactor for - # https://github.com/googleapis/python-bigquery-pandas/issues/339 - # now? - query_job.result() - # Get the table schema, so that we can list rows. - destination = self.client.get_table(query_job.destination) - rows_iter = self.client.list_rows(destination, max_results=max_results) schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) # ENDTODO: This is the only difference between table ID and @@ -829,6 +839,15 @@ def read_gbq( if dialect not in ("legacy", "standard"): raise ValueError("'{0}' is not valid for dialect".format(dialect)) + if configuration and "query" in configuration and "query" in configuration["query"]: + if query_or_table is not None: + raise ValueError( + "Query statement can't be specified " + "inside config while it is specified " + "as parameter" + ) + query_or_table = configuration["query"].pop("query") + connector = GbqConnector( project_id, reauth=reauth, @@ -840,13 +859,21 @@ def read_gbq( use_bqstorage_api=use_bqstorage_api, ) - final_df = connector.run_query( - query_or_table, - configuration=configuration, - max_results=max_results, - progress_bar_type=progress_bar_type, - dtypes=dtypes, - ) + if _is_query(query_or_table): + final_df = connector.run_query( + query_or_table, + configuration=configuration, + max_results=max_results, + progress_bar_type=progress_bar_type, + dtypes=dtypes, + ) + else: + final_df = connector.download_table( + query_or_table, + max_results=max_results, + progress_bar_type=progress_bar_type, + dtypes=dtypes, + ) # Reindex the DataFrame on the provided column if index_col is not None: diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 513df4b9..3f0c5e53 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -43,7 +43,7 @@ def mock_bigquery_client(monkeypatch): # Mock out SELECT 1 query results. def generate_schema(): - query = mock_client.query.call_args[0][0] + query = mock_client.query.call_args[0][0] if mock_client.query.call_args else "" if query == "SELECT 1 AS int_col": return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")] else: @@ -51,4 +51,10 @@ def generate_schema(): type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema) + # Mock out get_table. + def get_table(table_ref_or_id, **kwargs): + return google.cloud.bigquery.Table(table_ref_or_id) + + mock_client.get_table.side_effect = get_table + return mock_client diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 496486ef..480f0000 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -521,13 +521,14 @@ def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credenti assert to_dataframe_kwargs["progress_bar_type"] == "foobar" -def test_read_gbq_bypasses_query_with_table_id( +def test_read_gbq_with_full_table_id( mock_bigquery_client, mock_service_account_credentials ): mock_service_account_credentials.project_id = "service_account_project_id" df = gbq.read_gbq( "my-project.my_dataset.read_gbq_table", credentials=mock_service_account_credentials, + project_id="param-project", ) assert df is not None @@ -535,3 +536,37 @@ def test_read_gbq_bypasses_query_with_table_id( mock_bigquery_client.list_rows.assert_called_with( "my-project.my_dataset.read_gbq_table" ) + + +def test_read_gbq_with_partial_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + project_id="param-project", + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + mock_bigquery_client.list_rows.assert_called_with( + "param-project.my_dataset.read_gbq_table" + ) + + +def test_read_gbq_bypasses_query_with_table_id_and_max_results( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + max_results=11, + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + mock_bigquery_client.list_rows.assert_called_with( + "my-project.my_dataset.read_gbq_table", max_results=11 + ) From dd51ad8e7d9c51301e33ec2422fe6d80013e7322 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 15:49:49 -0600 Subject: [PATCH 06/37] fix remaining tests, don't localalize out-of-bounds timestamp columns --- pandas_gbq/gbq.py | 5 ++++- pandas_gbq/timestamp.py | 8 +++++++- tests/unit/test_gbq.py | 7 ++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 6a8b6788..247df17c 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -386,7 +386,10 @@ def download_table( try: # Get the table schema, so that we can list rows. - destination = self.client.get_table(table_id) + table_ref = bigquery.TableReference.from_string( + table_id, default_project=self.project_id + ) + destination = self.client.get_table(table_ref) rows_iter = self.client.list_rows(destination, max_results=max_results) except self.http_error as ex: self.process_http_error(ex) diff --git a/pandas_gbq/timestamp.py b/pandas_gbq/timestamp.py index e0b41475..c6bb6d93 100644 --- a/pandas_gbq/timestamp.py +++ b/pandas_gbq/timestamp.py @@ -7,6 +7,8 @@ Private module. """ +import pandas.api.types + def localize_df(df, schema_fields): """Localize any TIMESTAMP columns to tz-aware type. @@ -38,7 +40,11 @@ def localize_df(df, schema_fields): if "mode" in field and field["mode"].upper() == "REPEATED": continue - if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None: + if ( + field["type"].upper() == "TIMESTAMP" + and pandas.api.types.is_datetime64_ns_dtype(df.dtypes[column]) + and df[column].dt.tz is None + ): df[column] = df[column].dt.tz_localize("UTC") return df diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 480f0000..7593eea5 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -8,6 +8,7 @@ import datetime from unittest import mock +from google.cloud import bigquery import numpy import pandas from pandas import DataFrame @@ -534,7 +535,7 @@ def test_read_gbq_with_full_table_id( mock_bigquery_client.query.assert_not_called() mock_bigquery_client.list_rows.assert_called_with( - "my-project.my_dataset.read_gbq_table" + bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=None, ) @@ -551,7 +552,7 @@ def test_read_gbq_with_partial_table_id( mock_bigquery_client.query.assert_not_called() mock_bigquery_client.list_rows.assert_called_with( - "param-project.my_dataset.read_gbq_table" + bigquery.Table("param-project.my_dataset.read_gbq_table"), max_results=None, ) @@ -568,5 +569,5 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results( mock_bigquery_client.query.assert_not_called() mock_bigquery_client.list_rows.assert_called_with( - "my-project.my_dataset.read_gbq_table", max_results=11 + bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=11 ) From e1ad679671f920b5964b7b987d9ceb3b36dca10e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 15:52:12 -0600 Subject: [PATCH 07/37] Update pandas_gbq/gbq.py --- pandas_gbq/gbq.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 247df17c..fdd4dcc6 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -534,8 +534,6 @@ def _download_results( try: schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) - # ENDTODO: This is the only difference between table ID and - conversion_dtypes.update(user_dtypes) df = rows_iter.to_dataframe( dtypes=conversion_dtypes, From d29bc2ac072f0c1673944557b0dc53c12487a99a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:30:35 -0600 Subject: [PATCH 08/37] fix 3.7 unit tests --- noxfile.py | 2 +- tests/unit/test_gbq.py | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/noxfile.py b/noxfile.py index df3378bf..7530c68a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=88") + session.run("coverage", "report", "--show-missing", "--fail-under=91") session.run("coverage", "erase") diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 7593eea5..142771d1 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -8,7 +8,6 @@ import datetime from unittest import mock -from google.cloud import bigquery import numpy import pandas from pandas import DataFrame @@ -534,9 +533,10 @@ def test_read_gbq_with_full_table_id( assert df is not None mock_bigquery_client.query.assert_not_called() - mock_bigquery_client.list_rows.assert_called_with( - bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=None, - ) + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" def test_read_gbq_with_partial_table_id( @@ -551,9 +551,10 @@ def test_read_gbq_with_partial_table_id( assert df is not None mock_bigquery_client.query.assert_not_called() - mock_bigquery_client.list_rows.assert_called_with( - bigquery.Table("param-project.my_dataset.read_gbq_table"), max_results=None, - ) + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "param-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" def test_read_gbq_bypasses_query_with_table_id_and_max_results( @@ -568,6 +569,9 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results( assert df is not None mock_bigquery_client.query.assert_not_called() - mock_bigquery_client.list_rows.assert_called_with( - bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=11 - ) + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] + assert sent_max_results == 11 From cb8f24f5153535fdff344f2b3837b10222b4e322 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:32:56 -0600 Subject: [PATCH 09/37] correct coverage --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 7530c68a..398b4dc2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=91") + session.run("coverage", "report", "--show-missing", "--fail-under=89") session.run("coverage", "erase") From 56b73b213444955b28041f1822c6ceccee93916c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:34:30 -0600 Subject: [PATCH 10/37] skip coverage for optional test skip --- tests/unit/test_gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 142771d1..0c27dd76 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -487,7 +487,7 @@ def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_crede def test_read_gbq_use_bqstorage_api( mock_bigquery_client, mock_service_account_credentials ): - if not FEATURES.bigquery_has_bqstorage: + if not FEATURES.bigquery_has_bqstorage: # pragma: NO COVER pytest.skip("requires BigQuery Storage API") mock_service_account_credentials.project_id = "service_account_project_id" From 8a61e97e31d5fd5a29898554f52cb66c422f12e9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:47:34 -0600 Subject: [PATCH 11/37] fix docs build --- pandas_gbq/gbq.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index fdd4dcc6..41cb2f5b 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -712,14 +712,14 @@ def read_gbq( reauth : boolean, default False Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. - auth_local_webserver : boolean, default False - Use the `local webserver flow`_ instead of the `console flow`_ - when getting user credentials. - - .. _local webserver flow: - https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. _console flow: - https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + auth_local_webserver : bool, default False + Use the `local webserver flow + `_ + instead of the `console flow + `_ + when getting user credentials. Your code must run on the same machine + as your web browser and your web browser can access your application + via ``localhost:808X``. .. versionadded:: 0.2.0 dialect : str, default 'standard' @@ -954,13 +954,13 @@ def to_gbq( ``'append'`` If table exists, insert data. Create if does not exist. auth_local_webserver : bool, default False - Use the `local webserver flow`_ instead of the `console flow`_ - when getting user credentials. - - .. _local webserver flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. _console flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + Use the `local webserver flow + `_ + instead of the `console flow + `_ + when getting user credentials. Your code must run on the same machine + as your web browser and your web browser can access your application + via ``localhost:808X``. .. versionadded:: 0.2.0 table_schema : list of dicts, optional From 3f7900bf184a10337c9bab19fa703211650da1df Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:56:20 -0600 Subject: [PATCH 12/37] improve test coverage for error case --- tests/unit/test_gbq.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 0c27dd76..9a0e8ce3 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -8,6 +8,7 @@ import datetime from unittest import mock +import google.api_core.exceptions import numpy import pandas from pandas import DataFrame @@ -575,3 +576,17 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results( assert sent_table.table_id == "read_gbq_table" sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] assert sent_max_results == 11 + + +def test_read_gbq_with_list_rows_error_translates_exception( + mock_bigquery_client, mock_service_account_credentials +): + mock_bigquery_client.list_rows.side_effect = ( + google.api_core.exceptions.NotFound("table not found"), + ) + + with pytest.raises(gbq.GenericGBQException, match="table not found"): + gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + ) From 3c53f1f697265a9034b00fadfe99f525100f8eae Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 13 Dec 2021 10:21:46 -0600 Subject: [PATCH 13/37] as of google-cloud-bigquery 1.11.0, get_table before list_rows is unnecessary --- pandas_gbq/gbq.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 41cb2f5b..1ba64057 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -385,12 +385,10 @@ def download_table( self._start_timer() try: - # Get the table schema, so that we can list rows. table_ref = bigquery.TableReference.from_string( table_id, default_project=self.project_id ) - destination = self.client.get_table(table_ref) - rows_iter = self.client.list_rows(destination, max_results=max_results) + rows_iter = self.client.list_rows(table_ref, max_results=max_results) except self.http_error as ex: self.process_http_error(ex) @@ -489,9 +487,9 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): except self.http_error as ex: self.process_http_error(ex) - # Get the table schema, so that we can list rows. - destination = self.client.get_table(query_reply.destination) - rows_iter = self.client.list_rows(destination, max_results=max_results) + rows_iter = self.client.list_rows( + query_reply.destination, max_results=max_results + ) return self._download_results( rows_iter, max_results=max_results, From 5ce125f13934b27a16889ae22aabf65d77837178 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Dec 2021 13:42:21 -0600 Subject: [PATCH 14/37] tests with whitespace --- tests/unit/test_gbq.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 9a0e8ce3..df9241bc 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -87,8 +87,14 @@ def test__bqschema_to_nullsafe_dtypes(type_, expected): ["query_or_table", "expected"], [ ("SELECT 1", True), + ("SELECT\n1", True), + ("SELECT\t1", True), ("dataset.table", False), + (" dataset.table ", False), + ("\r\ndataset.table\r\n", False), ("project-id.dataset.table", False), + (" project-id.dataset.table ", False), + ("\r\nproject-id.dataset.table\r\n", False), ], ) def test__is_query(query_or_table, expected): From ea660f41e7717a20038119a17890795c3cfedb38 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Dec 2021 13:49:25 -0600 Subject: [PATCH 15/37] type annotations --- pandas_gbq/gbq.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 1ba64057..cc6bef1f 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -7,9 +7,17 @@ import time import warnings from datetime import datetime +import typing +from typing import Dict, Optional, Union import numpy as np +# Only import at module-level at type checking time to avoid circular +# dependencies in the pandas package, which has an optional dependency on +# pandas-gbq. +if typing.TYPE_CHECKING: + import pandas + # Required dependencies, but treat as optional so that _test_google_api_imports # can provide a better error message. try: @@ -380,8 +388,14 @@ def process_http_error(ex): raise GenericGBQException("Reason: {0}".format(ex)) def download_table( - self, table_id, max_results=None, progress_bar_type=None, dtypes=None - ): + self, + table_id: str, + max_results: Optional[int] = None, + progress_bar_type: Optional[str] = None, + dtypes: Dict[ + str, Union[str, "pandas.api.extensions.ExtensionDtype", np.dtype] + ] = None, + ) -> "pandas.DataFrame": self._start_timer() try: From 3d93c7880056843fa4021efc5e69a931888bde09 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Dec 2021 17:23:44 -0600 Subject: [PATCH 16/37] test: improve unit test coverage --- pandas_gbq/schema.py | 14 +++++++++++++- tests/unit/test_features.py | 19 ++++++++++++++++++ tests/unit/test_schema.py | 37 ++++++++++++++++++++++++++++++++++++ tests/unit/test_timestamp.py | 9 +++++++++ 4 files changed, 78 insertions(+), 1 deletion(-) diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index e2f97455..cfa1c765 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -21,7 +21,19 @@ def to_pandas_gbq(client_schema): """Given a sequence of :class:`google.cloud.bigquery.schema.SchemaField`, return a schema in pandas-gbq API format. """ - remote_fields = [field_remote.to_api_repr() for field_remote in client_schema] + remote_fields = [ + # Filter out default values. google-cloud-bigquery versions before + # 2.31.0 (https://github.com/googleapis/python-bigquery/pull/557) + # include a description key, even if not explicitly set. This has the + # potential to unset the description unintentionally in cases where + # pandas-gbq is updating the schema. + { + key: value + for key, value in field_remote.to_api_repr().items() + if value is not None + } + for field_remote in client_schema + ] for field in remote_fields: field["type"] = field["type"].upper() field["mode"] = field["mode"].upper() diff --git a/tests/unit/test_features.py b/tests/unit/test_features.py index b10b0fa8..d62480f3 100644 --- a/tests/unit/test_features.py +++ b/tests/unit/test_features.py @@ -10,6 +10,7 @@ @pytest.fixture(autouse=True) def fresh_bigquery_version(monkeypatch): monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None) + monkeypatch.setattr(FEATURES, "_pandas_installed_version", None) @pytest.mark.parametrize( @@ -28,3 +29,21 @@ def test_bigquery_has_from_dataframe_with_csv(monkeypatch, bigquery_version, exp monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version) assert FEATURES.bigquery_has_from_dataframe_with_csv == expected + + +@pytest.mark.parametrize( + ["pandas_version", "expected"], + [ + ("0.14.7", False), + ("0.22.1", False), + ("0.23.0", True), + ("0.23.1", True), + ("1.0.0", True), + ("2.1.3", True), + ], +) +def test_pandas_has_deprecated_verbose(monkeypatch, pandas_version, expected): + import pandas + + monkeypatch.setattr(pandas, "__version__", pandas_version) + assert FEATURES.pandas_has_deprecated_verbose == expected diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 743ddc26..d31ac2e9 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -3,7 +3,9 @@ # license that can be found in the LICENSE file. import datetime +from typing import Any, Dict, List +import google.cloud.bigquery import pandas import pytest @@ -151,3 +153,38 @@ def test_generate_bq_schema(module_under_test, dataframe, expected_schema): def test_update_schema(module_under_test, schema_old, schema_new, expected_output): output = module_under_test.update_schema(schema_old, schema_new) assert output == expected_output + + +@pytest.mark.parametrize( + ["bq_schema", "expected"], + [ + ([], {"fields": []}), + ( + [google.cloud.bigquery.SchemaField("test_col", "STRING")], + {"fields": [{"name": "test_col", "type": "STRING", "mode": "NULLABLE"}]}, + ), + ( + [google.cloud.bigquery.SchemaField("test_col", "STRING", mode="REQUIRED")], + {"fields": [{"name": "test_col", "type": "STRING", "mode": "REQUIRED"}]}, + ), + ( + [ + google.cloud.bigquery.SchemaField("test1", "STRING"), + google.cloud.bigquery.SchemaField("test2", "INTEGER"), + ], + { + "fields": [ + {"name": "test1", "type": "STRING", "mode": "NULLABLE"}, + {"name": "test2", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + ), + ], +) +def test_to_pandas_gbq( + bq_schema: List[google.cloud.bigquery.SchemaField], expected: Dict[str, Any] +): + import pandas_gbq.schema + + result = pandas_gbq.schema.to_pandas_gbq(bq_schema) + assert result == expected diff --git a/tests/unit/test_timestamp.py b/tests/unit/test_timestamp.py index 406643d0..b35c1307 100644 --- a/tests/unit/test_timestamp.py +++ b/tests/unit/test_timestamp.py @@ -56,6 +56,14 @@ def test_localize_df_with_timestamp_column(module_under_test): dtype="datetime64[ns]", ), "float_col": [0.1, 0.2, 0.3], + "repeated_col": pandas.Series( + [ + ["2011-01-01 01:02:03"], + ["2012-02-02 04:05:06"], + ["2013-03-03 07:08:09"], + ], + dtype="object", + ), } ) expected = df.copy() @@ -64,6 +72,7 @@ def test_localize_df_with_timestamp_column(module_under_test): {"name": "integer_col", "type": "INTEGER"}, {"name": "timestamp_col", "type": "TIMESTAMP"}, {"name": "float_col", "type": "FLOAT"}, + {"name": "repeated_col", "type": "TIMESTAMP", "mode": "REPEATED"}, ] localized = module_under_test.localize_df(df, bq_schema) From 93e872e2bfb2f536f55b0d112622be0818a3cc7b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Dec 2021 17:25:13 -0600 Subject: [PATCH 17/37] boost coverage --- noxfile.py | 2 +- owlbot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 398b4dc2..43825e5e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=89") + session.run("coverage", "report", "--show-missing", "--fail-under=90") session.run("coverage", "erase") diff --git a/owlbot.py b/owlbot.py index 5ef93de7..3c31a67f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -33,7 +33,7 @@ templated_files = common.py_library( unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"], system_test_python_versions=["3.7", "3.8", "3.9", "3.10"], - cov_level=88, + cov_level=90, unit_test_extras=extras, system_test_extras=extras, intersphinx_dependencies={ From e0ae455974302245cef9cbe65a04fb3df324b331 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 20 Dec 2021 23:27:31 +0000 Subject: [PATCH 18/37] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 398b4dc2..df3378bf 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=89") + session.run("coverage", "report", "--show-missing", "--fail-under=88") session.run("coverage", "erase") From 28b72f0102e20eb50e45d45a413a7ca232eddf9c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Dec 2021 13:36:42 -0600 Subject: [PATCH 19/37] boost coverage --- pandas_gbq/load.py | 7 +++++- pandas_gbq/schema.py | 2 ++ tests/unit/test_load.py | 56 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 315ad5cd..588a6719 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -185,6 +185,11 @@ def load_csv_from_file( chunksize: Optional[int], schema: Optional[Dict[str, Any]], ): + """Manually encode a DataFrame to CSV and use the buffer in a load job. + + This method is needed for writing with google-cloud-bigquery versions that + don't implment load_table_from_dataframe with the CSV serialization format. + """ if schema is None: schema = pandas_gbq.schema.generate_bq_schema(dataframe) @@ -203,7 +208,7 @@ def load_chunk(chunk, job_config): finally: chunk_buffer.close() - return load_csv(dataframe, chunksize, bq_schema, load_chunk,) + return load_csv(dataframe, chunksize, bq_schema, load_chunk) def load_chunks( diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index cfa1c765..5877ddfb 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -113,6 +113,8 @@ def generate_bq_schema(dataframe, default_type="STRING"): "S": "STRING", "U": "STRING", "M": "TIMESTAMP", + # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is + # localized. } fields = [] diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 8e18cfb9..3f32bff9 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -95,6 +95,62 @@ def test_encode_chunks_with_chunksize_none(): assert len(chunk.index) == 6 +def test_load_csv_from_file_generates_schema(mock_bigquery_client): + import google.cloud.bigquery + + df = pandas.DataFrame( + { + "int_col": [1, 2, 3], + "bool_col": [True, False, True], + "float_col": [0.0, 1.25, -2.75], + "string_col": ["a", "b", "c"], + "datetime_col": pandas.Series( + [ + "2021-12-21 13:28:40.123789", + "2000-01-01 11:10:09", + "2040-10-31 23:59:59.999999", + ], + dtype="datetime64[ns]", + ), + "timestamp_col": pandas.Series( + [ + "2021-12-21 13:28:40.123789", + "2000-01-01 11:10:09", + "2040-10-31 23:59:59.999999", + ], + dtype="datetime64[ns]", + ).dt.tz_localize(datetime.timezone.utc), + } + ) + destination = google.cloud.bigquery.TableReference.from_string( + "my-project.my_dataset.my_table" + ) + + _ = list( + load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None) + ) + + mock_load = mock_bigquery_client.load_table_from_file + assert mock_load.called + _, kwargs = mock_load.call_args + assert "job_config" in kwargs + sent_schema = kwargs["job_config"].schema + assert sent_schema[0].name == "int_col" + assert sent_schema[0].field_type == "INTEGER" + assert sent_schema[1].name == "bool_col" + assert sent_schema[1].field_type == "BOOLEAN" + assert sent_schema[2].name == "float_col" + assert sent_schema[2].field_type == "FLOAT" + assert sent_schema[3].name == "string_col" + assert sent_schema[3].field_type == "STRING" + # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is + # localized. + assert sent_schema[4].name == "datetime_col" + assert sent_schema[4].field_type == "TIMESTAMP" + assert sent_schema[5].name == "timestamp_col" + assert sent_schema[5].field_type == "TIMESTAMP" + + @pytest.mark.parametrize( ["bigquery_has_from_dataframe_with_csv", "api_method"], [(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")], From 1d6831f8a2e59f5fdaf157953a0d8ec56b187c0d Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 21 Dec 2021 19:41:25 +0000 Subject: [PATCH 20/37] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index ba50bf32..935f2a53 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,7 +22,7 @@ omit = google/cloud/__init__.py [report] -fail_under = 88 +fail_under = 90 show_missing = True exclude_lines = # Re-enable the standard pragma From fcf8276cdfc7d9b3c7cc8c7740f2134fff195c07 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Dec 2021 15:54:51 -0600 Subject: [PATCH 21/37] finish coverage for load.py --- tests/unit/test_load.py | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 3f32bff9..9f1b685b 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -16,6 +16,7 @@ import pandas.testing import pytest +from pandas_gbq import exceptions from pandas_gbq.features import FEATURES from pandas_gbq import load @@ -95,6 +96,27 @@ def test_encode_chunks_with_chunksize_none(): assert len(chunk.index) == 6 +def test_load_csv_from_dataframe_allows_client_to_generate_schema(mock_bigquery_client): + import google.cloud.bigquery + + df = pandas.DataFrame({"int_col": [1, 2, 3]}) + destination = google.cloud.bigquery.TableReference.from_string( + "my-project.my_dataset.my_table" + ) + + _ = list( + load.load_csv_from_dataframe( + mock_bigquery_client, df, destination, None, None, None + ) + ) + + mock_load = mock_bigquery_client.load_table_from_dataframe + assert mock_load.called + _, kwargs = mock_load.call_args + assert "job_config" in kwargs + assert kwargs["job_config"].schema is None + + def test_load_csv_from_file_generates_schema(mock_bigquery_client): import google.cloud.bigquery @@ -199,6 +221,39 @@ def test_load_chunks_with_invalid_api_method(): load.load_chunks(None, None, None, api_method="not_a_thing") +def test_load_parquet_allows_client_to_generate_schema(mock_bigquery_client): + import google.cloud.bigquery + + df = pandas.DataFrame({"int_col": [1, 2, 3]}) + destination = google.cloud.bigquery.TableReference.from_string( + "my-project.my_dataset.my_table" + ) + + load.load_parquet(mock_bigquery_client, df, destination, None, None) + + mock_load = mock_bigquery_client.load_table_from_dataframe + assert mock_load.called + _, kwargs = mock_load.call_args + assert "job_config" in kwargs + assert kwargs["job_config"].schema is None + + +def test_load_parquet_with_bad_conversion(mock_bigquery_client): + import google.cloud.bigquery + import pyarrow + + mock_bigquery_client.load_table_from_dataframe.side_effect = ( + pyarrow.lib.ArrowInvalid() + ) + df = pandas.DataFrame({"int_col": [1, 2, 3]}) + destination = google.cloud.bigquery.TableReference.from_string( + "my-project.my_dataset.my_table" + ) + + with pytest.raises(exceptions.ConversionError): + load.load_parquet(mock_bigquery_client, df, destination, None, None) + + @pytest.mark.parametrize( ("numeric_type",), ( From 76b38a389f880c0c5b87c4b5a10f6fb7c6749d97 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Dec 2021 16:04:58 -0600 Subject: [PATCH 22/37] another test --- tests/unit/test_gbq.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index df9241bc..edf0f37f 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -290,7 +290,7 @@ def test_to_gbq_w_project_table(mock_bigquery_client): assert table.project == "project_table" -def test_to_gbq_creates_dataset(mock_bigquery_client): +def test_to_gbq_create_dataset(mock_bigquery_client): import google.api_core.exceptions mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( @@ -303,6 +303,23 @@ def test_to_gbq_creates_dataset(mock_bigquery_client): mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) +def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" + ) + mock_bigquery_client.create_dataset.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + + with pytest.raises(gbq.GenericGBQException): + gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") + + def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth From 2fd1e326d7401de8f32159e9f794e5ff396f58f9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Dec 2021 17:05:35 -0600 Subject: [PATCH 23/37] refactor gbq tests --- pandas_gbq/gbq.py | 4 +- tests/unit/conftest.py | 30 +++ tests/unit/test_auth.py | 10 +- tests/unit/test_gbq.py | 511 ------------------------------------ tests/unit/test_read_gbq.py | 260 ++++++++++++++++++ tests/unit/test_to_gbq.py | 274 +++++++++++++++++++ 6 files changed, 568 insertions(+), 521 deletions(-) create mode 100644 tests/unit/test_read_gbq.py create mode 100644 tests/unit/test_to_gbq.py diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index cc6bef1f..b557ccc1 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -1355,9 +1355,7 @@ def create(self, dataset_id): from google.cloud.bigquery import Dataset if self.exists(dataset_id): - raise DatasetCreationError( - "Dataset {0} already " "exists".format(dataset_id) - ) + raise DatasetCreationError("Dataset {0} already exists".format(dataset_id)) dataset = Dataset(self._dataset_ref(dataset_id)) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 3f0c5e53..650f851c 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -9,6 +9,36 @@ import pytest +def mock_get_credentials(*args, **kwargs): + import google.auth.credentials + + mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) + return mock_credentials, "default-project" + + +@pytest.fixture +def mock_service_account_credentials(): + import google.oauth2.service_account + + mock_credentials = mock.create_autospec(google.oauth2.service_account.Credentials) + return mock_credentials + + +@pytest.fixture +def mock_compute_engine_credentials(): + import google.auth.compute_engine + + mock_credentials = mock.create_autospec(google.auth.compute_engine.Credentials) + return mock_credentials + + +@pytest.fixture(autouse=True) +def no_auth(monkeypatch): + import pydata_google_auth + + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials) + + @pytest.fixture(autouse=True, scope="function") def reset_context(): import pandas_gbq diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index c101942e..2fa9b828 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -47,16 +47,12 @@ def test_get_credentials_load_user_no_default(monkeypatch): import google.auth.credentials import pydata_google_auth.cache - def mock_default_credentials(scopes=None, request=None): - return (None, None) - - monkeypatch.setattr(google.auth, "default", mock_default_credentials) mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials) - mock_cache = mock.create_autospec(pydata_google_auth.cache.CredentialsCache) - mock_cache.load.return_value = mock_user_credentials + def mock_default_credentials(scopes, **kwargs): + return (mock_user_credentials, None) - monkeypatch.setattr(auth, "get_credentials_cache", lambda _: mock_cache) + monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials) credentials, project = auth.get_credentials() assert project is None diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index edf0f37f..d41d2ad0 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -4,11 +4,8 @@ # -*- coding: utf-8 -*- -import copy -import datetime from unittest import mock -import google.api_core.exceptions import numpy import pandas from pandas import DataFrame @@ -32,36 +29,6 @@ def mock_get_credentials_no_project(*args, **kwargs): return mock_credentials, None -def mock_get_credentials(*args, **kwargs): - import google.auth.credentials - - mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) - return mock_credentials, "default-project" - - -@pytest.fixture -def mock_service_account_credentials(): - import google.oauth2.service_account - - mock_credentials = mock.create_autospec(google.oauth2.service_account.Credentials) - return mock_credentials - - -@pytest.fixture -def mock_compute_engine_credentials(): - import google.auth.compute_engine - - mock_credentials = mock.create_autospec(google.auth.compute_engine.Credentials) - return mock_credentials - - -@pytest.fixture(autouse=True) -def no_auth(monkeypatch): - import pydata_google_auth - - monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials) - - @pytest.mark.parametrize( ("type_", "expected"), [ @@ -130,486 +97,8 @@ def test_GbqConnector_get_client_w_new_bq(mock_bigquery_client): assert kwargs["client_info"].user_agent == "pandas-{}".format(pandas.__version__) -def test_to_gbq_should_fail_if_invalid_table_name_passed(): - with pytest.raises(gbq.NotFoundException): - gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234") - - -def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch): - import pydata_google_auth - - monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) - - with pytest.raises(ValueError, match="Could not determine project ID"): - gbq.to_gbq(DataFrame([[1]]), "dataset.tablename") - - -@pytest.mark.parametrize( - ["api_method", "warning_message", "warning_type"], - [ - ("load_parquet", "chunksize is ignored", DeprecationWarning), - ("load_csv", "chunksize will be ignored", PendingDeprecationWarning), - ], -) -def test_to_gbq_with_chunksize_warns_deprecation( - api_method, warning_message, warning_type -): - with pytest.warns(warning_type, match=warning_message): - try: - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - api_method=api_method, - chunksize=100, - ) - except gbq.TableCreationError: - pass - - -@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) -def test_to_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=True), - ) - with pytest.warns(FutureWarning, match="verbose is deprecated"): - try: - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - verbose=verbose, - ) - except gbq.TableCreationError: - pass - - -def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=True), - ) - try: - gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") - except gbq.TableCreationError: - pass - assert len(recwarn) == 0 - - -def test_to_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=False), - ) - try: - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - verbose=True, - ) - except gbq.TableCreationError: - pass - assert len(recwarn) == 0 - - -def test_to_gbq_with_private_key_raises_notimplementederror(): - with pytest.raises(NotImplementedError, match="private_key"): - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - private_key="path/to/key.json", - ) - - -def test_to_gbq_doesnt_run_query(mock_bigquery_client): - try: - gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") - except gbq.TableCreationError: - pass - - mock_bigquery_client.query.assert_not_called() - - -def test_to_gbq_w_empty_df(mock_bigquery_client): - import google.api_core.exceptions - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - gbq.to_gbq(DataFrame(), "my_dataset.my_table", project_id="1234") - mock_bigquery_client.create_table.assert_called_with(mock.ANY) - mock_bigquery_client.load_table_from_dataframe.assert_not_called() - mock_bigquery_client.load_table_from_file.assert_not_called() - - -def test_to_gbq_w_default_project(mock_bigquery_client): - """If no project is specified, we should be able to use project from - default credentials. - """ - import google.api_core.exceptions - from google.cloud.bigquery.table import TableReference - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - gbq.to_gbq(DataFrame(), "my_dataset.my_table") - - mock_bigquery_client.get_table.assert_called_with( - TableReference.from_string("default-project.my_dataset.my_table") - ) - mock_bigquery_client.create_table.assert_called_with(mock.ANY) - table = mock_bigquery_client.create_table.call_args[0][0] - assert table.project == "default-project" - - -def test_to_gbq_w_project_table(mock_bigquery_client): - """If a project is included in the table ID, use that instead of the client - project. See: https://github.com/pydata/pandas-gbq/issues/321 - """ - import google.api_core.exceptions - from google.cloud.bigquery.table import TableReference - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - gbq.to_gbq( - DataFrame(), "project_table.my_dataset.my_table", project_id="project_client", - ) - - mock_bigquery_client.get_table.assert_called_with( - TableReference.from_string("project_table.my_dataset.my_table") - ) - mock_bigquery_client.create_table.assert_called_with(mock.ANY) - table = mock_bigquery_client.create_table.call_args[0][0] - assert table.project == "project_table" - - -def test_to_gbq_create_dataset(mock_bigquery_client): - import google.api_core.exceptions - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( - "my_dataset" - ) - gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") - mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) - - -def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): - import google.api_core.exceptions - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( - "my_dataset" - ) - mock_bigquery_client.create_dataset.side_effect = google.api_core.exceptions.InternalServerError( - "something went wrong" - ) - - with pytest.raises(gbq.GenericGBQException): - gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") - - -def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): - import pydata_google_auth - - monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) - - with pytest.raises(ValueError, match="Could not determine project ID"): - gbq.read_gbq("SELECT 1", dialect="standard") - - -def test_read_gbq_with_inferred_project_id(mock_bigquery_client): - df = gbq.read_gbq("SELECT 1", dialect="standard") - assert df is not None - mock_bigquery_client.query.assert_called_once() - - -def test_read_gbq_with_inferred_project_id_from_service_account_credentials( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1", dialect="standard", credentials=mock_service_account_credentials, - ) - assert df is not None - mock_bigquery_client.query.assert_called_once_with( - "SELECT 1", - job_config=mock.ANY, - location=None, - project="service_account_project_id", - ) - - -def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials( - mock_compute_engine_credentials, -): - with pytest.raises(ValueError, match="Could not determine project ID"): - gbq.read_gbq( - "SELECT 1", dialect="standard", credentials=mock_compute_engine_credentials, - ) - - -def test_read_gbq_with_max_results_zero(monkeypatch): - df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=0) - assert df is None - - -def test_read_gbq_with_max_results_ten(monkeypatch, mock_bigquery_client): - df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=10) - assert df is not None - mock_bigquery_client.list_rows.assert_called_with(mock.ANY, max_results=10) - - -@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) -def test_read_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=True), - ) - with pytest.warns(FutureWarning, match="verbose is deprecated"): - gbq.read_gbq("SELECT 1", project_id="my-project", verbose=verbose) - - -def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=False), - ) - gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard") - assert len(recwarn) == 0 - - -def test_read_gbq_with_old_bq_raises_importerror(monkeypatch): - import google.cloud.bigquery - - monkeypatch.setattr(google.cloud.bigquery, "__version__", "0.27.0") - monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None) - with pytest.raises(ImportError, match="google-cloud-bigquery"): - gbq.read_gbq( - "SELECT 1", project_id="my-project", - ) - - -def test_read_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=False), - ) - gbq.read_gbq( - "SELECT 1", project_id="my-project", dialect="standard", verbose=True, - ) - assert len(recwarn) == 0 - - -def test_read_gbq_with_private_raises_notimplmentederror(): - with pytest.raises(NotImplementedError, match="private_key"): - gbq.read_gbq( - "SELECT 1", project_id="my-project", private_key="path/to/key.json" - ) - - -def test_read_gbq_with_invalid_dialect(): - with pytest.raises(ValueError, match="is not valid for dialect"): - gbq.read_gbq("SELECT 1", dialect="invalid") - - -def test_read_gbq_with_configuration_query(): - df = gbq.read_gbq(None, configuration={"query": {"query": "SELECT 2"}}) - assert df is not None - - -def test_read_gbq_with_configuration_duplicate_query_raises_error(): - with pytest.raises( - ValueError, match="Query statement can't be specified inside config" - ): - gbq.read_gbq("SELECT 1", configuration={"query": {"query": "SELECT 2"}}) - - def test_generate_bq_schema_deprecated(): # 11121 Deprecation of generate_bq_schema with pytest.warns(FutureWarning): df = DataFrame([[1, "two"], [3, "four"]]) gbq.generate_bq_schema(df) - - -def test_load_does_not_modify_schema_arg(mock_bigquery_client): - """Test of Issue # 277.""" - from google.api_core.exceptions import NotFound - - # Create table with new schema. - mock_bigquery_client.get_table.side_effect = NotFound("nope") - df = DataFrame( - { - "field1": ["a", "b"], - "field2": [1, 2], - "field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)], - } - ) - original_schema = [ - {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, - {"name": "field2", "type": "INTEGER"}, - {"name": "field3", "type": "DATE"}, - ] - original_schema_cp = copy.deepcopy(original_schema) - gbq.to_gbq( - df, - "dataset.schematest", - project_id="my-project", - table_schema=original_schema, - if_exists="fail", - ) - assert original_schema == original_schema_cp - - # Test again now that table exists - behavior will differ internally - # branch at if table.exists(table_id) - original_schema = [ - {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, - {"name": "field2", "type": "INTEGER"}, - {"name": "field3", "type": "DATE"}, - ] - original_schema_cp = copy.deepcopy(original_schema) - gbq.to_gbq( - df, - "dataset.schematest", - project_id="my-project", - table_schema=original_schema, - if_exists="append", - ) - assert original_schema == original_schema_cp - - -def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_credentials): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1 AS int_col", - dialect="standard", - credentials=mock_service_account_credentials, - dtypes={"int_col": "my-custom-dtype"}, - ) - assert df is not None - - mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) - - _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args - assert to_dataframe_kwargs["dtypes"] == {"int_col": "my-custom-dtype"} - - -def test_read_gbq_use_bqstorage_api( - mock_bigquery_client, mock_service_account_credentials -): - if not FEATURES.bigquery_has_bqstorage: # pragma: NO COVER - pytest.skip("requires BigQuery Storage API") - - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1 AS int_col", - dialect="standard", - credentials=mock_service_account_credentials, - use_bqstorage_api=True, - ) - assert df is not None - - mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) - mock_list_rows.to_dataframe.assert_called_once_with( - create_bqstorage_client=True, dtypes=mock.ANY, progress_bar_type=mock.ANY, - ) - - -def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credentials): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1", - dialect="standard", - credentials=mock_service_account_credentials, - progress_bar_type="foobar", - ) - assert df is not None - - mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) - - _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args - assert to_dataframe_kwargs["progress_bar_type"] == "foobar" - - -def test_read_gbq_with_full_table_id( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "my-project.my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - project_id="param-project", - ) - assert df is not None - - mock_bigquery_client.query.assert_not_called() - sent_table = mock_bigquery_client.list_rows.call_args[0][0] - assert sent_table.project == "my-project" - assert sent_table.dataset_id == "my_dataset" - assert sent_table.table_id == "read_gbq_table" - - -def test_read_gbq_with_partial_table_id( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - project_id="param-project", - ) - assert df is not None - - mock_bigquery_client.query.assert_not_called() - sent_table = mock_bigquery_client.list_rows.call_args[0][0] - assert sent_table.project == "param-project" - assert sent_table.dataset_id == "my_dataset" - assert sent_table.table_id == "read_gbq_table" - - -def test_read_gbq_bypasses_query_with_table_id_and_max_results( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "my-project.my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - max_results=11, - ) - assert df is not None - - mock_bigquery_client.query.assert_not_called() - sent_table = mock_bigquery_client.list_rows.call_args[0][0] - assert sent_table.project == "my-project" - assert sent_table.dataset_id == "my_dataset" - assert sent_table.table_id == "read_gbq_table" - sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] - assert sent_max_results == 11 - - -def test_read_gbq_with_list_rows_error_translates_exception( - mock_bigquery_client, mock_service_account_credentials -): - mock_bigquery_client.list_rows.side_effect = ( - google.api_core.exceptions.NotFound("table not found"), - ) - - with pytest.raises(gbq.GenericGBQException, match="table not found"): - gbq.read_gbq( - "my-project.my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - ) diff --git a/tests/unit/test_read_gbq.py b/tests/unit/test_read_gbq.py new file mode 100644 index 00000000..4e5e43f7 --- /dev/null +++ b/tests/unit/test_read_gbq.py @@ -0,0 +1,260 @@ +# Copyright (c) 2021 pandas-gbq Authors All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +from unittest import mock + +import google.api_core.exceptions +import pytest + +from pandas_gbq import gbq +from pandas_gbq.features import FEATURES + + +def mock_get_credentials_no_project(*args, **kwargs): + import google.auth.credentials + + mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) + return mock_credentials, None + + +def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): + import pydata_google_auth + + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) + + with pytest.raises(ValueError, match="Could not determine project ID"): + gbq.read_gbq("SELECT 1", dialect="standard") + + +def test_read_gbq_with_inferred_project_id(mock_bigquery_client): + df = gbq.read_gbq("SELECT 1", dialect="standard") + assert df is not None + mock_bigquery_client.query.assert_called_once() + + +def test_read_gbq_with_inferred_project_id_from_service_account_credentials( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1", dialect="standard", credentials=mock_service_account_credentials, + ) + assert df is not None + mock_bigquery_client.query.assert_called_once_with( + "SELECT 1", + job_config=mock.ANY, + location=None, + project="service_account_project_id", + ) + + +def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials( + mock_compute_engine_credentials, +): + with pytest.raises(ValueError, match="Could not determine project ID"): + gbq.read_gbq( + "SELECT 1", dialect="standard", credentials=mock_compute_engine_credentials, + ) + + +def test_read_gbq_with_max_results_zero(monkeypatch): + df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=0) + assert df is None + + +def test_read_gbq_with_max_results_ten(monkeypatch, mock_bigquery_client): + df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=10) + assert df is not None + mock_bigquery_client.list_rows.assert_called_with(mock.ANY, max_results=10) + + +@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) +def test_read_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=True), + ) + with pytest.warns(FutureWarning, match="verbose is deprecated"): + gbq.read_gbq("SELECT 1", project_id="my-project", verbose=verbose) + + +def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=False), + ) + gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard") + assert len(recwarn) == 0 + + +def test_read_gbq_with_old_bq_raises_importerror(monkeypatch): + import google.cloud.bigquery + + monkeypatch.setattr(google.cloud.bigquery, "__version__", "0.27.0") + monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None) + with pytest.raises(ImportError, match="google-cloud-bigquery"): + gbq.read_gbq( + "SELECT 1", project_id="my-project", + ) + + +def test_read_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=False), + ) + gbq.read_gbq( + "SELECT 1", project_id="my-project", dialect="standard", verbose=True, + ) + assert len(recwarn) == 0 + + +def test_read_gbq_with_private_raises_notimplmentederror(): + with pytest.raises(NotImplementedError, match="private_key"): + gbq.read_gbq( + "SELECT 1", project_id="my-project", private_key="path/to/key.json" + ) + + +def test_read_gbq_with_invalid_dialect(): + with pytest.raises(ValueError, match="is not valid for dialect"): + gbq.read_gbq("SELECT 1", dialect="invalid") + + +def test_read_gbq_with_configuration_query(): + df = gbq.read_gbq(None, configuration={"query": {"query": "SELECT 2"}}) + assert df is not None + + +def test_read_gbq_with_configuration_duplicate_query_raises_error(): + with pytest.raises( + ValueError, match="Query statement can't be specified inside config" + ): + gbq.read_gbq("SELECT 1", configuration={"query": {"query": "SELECT 2"}}) + + +def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_credentials): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1 AS int_col", + dialect="standard", + credentials=mock_service_account_credentials, + dtypes={"int_col": "my-custom-dtype"}, + ) + assert df is not None + + mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) + + _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args + assert to_dataframe_kwargs["dtypes"] == {"int_col": "my-custom-dtype"} + + +def test_read_gbq_use_bqstorage_api( + mock_bigquery_client, mock_service_account_credentials +): + if not FEATURES.bigquery_has_bqstorage: # pragma: NO COVER + pytest.skip("requires BigQuery Storage API") + + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1 AS int_col", + dialect="standard", + credentials=mock_service_account_credentials, + use_bqstorage_api=True, + ) + assert df is not None + + mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) + mock_list_rows.to_dataframe.assert_called_once_with( + create_bqstorage_client=True, dtypes=mock.ANY, progress_bar_type=mock.ANY, + ) + + +def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credentials): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1", + dialect="standard", + credentials=mock_service_account_credentials, + progress_bar_type="foobar", + ) + assert df is not None + + mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) + + _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args + assert to_dataframe_kwargs["progress_bar_type"] == "foobar" + + +def test_read_gbq_with_full_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + project_id="param-project", + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + + +def test_read_gbq_with_partial_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + project_id="param-project", + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "param-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + + +def test_read_gbq_bypasses_query_with_table_id_and_max_results( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + max_results=11, + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] + assert sent_max_results == 11 + + +def test_read_gbq_with_list_rows_error_translates_exception( + mock_bigquery_client, mock_service_account_credentials +): + mock_bigquery_client.list_rows.side_effect = ( + google.api_core.exceptions.NotFound("table not found"), + ) + + with pytest.raises(gbq.GenericGBQException, match="table not found"): + gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + ) diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py new file mode 100644 index 00000000..8c07f557 --- /dev/null +++ b/tests/unit/test_to_gbq.py @@ -0,0 +1,274 @@ +# Copyright (c) 2021 pandas-gbq Authors All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +import copy +import datetime +from unittest import mock + +from pandas import DataFrame +import pytest + +from pandas_gbq import gbq +from pandas_gbq.features import FEATURES + + +def mock_get_credentials_no_project(*args, **kwargs): + import google.auth.credentials + + mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) + return mock_credentials, None + + +def test_to_gbq_should_fail_if_invalid_table_name_passed(): + with pytest.raises(gbq.NotFoundException): + gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234") + + +def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch): + import pydata_google_auth + + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) + + with pytest.raises(ValueError, match="Could not determine project ID"): + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename") + + +@pytest.mark.parametrize( + ["api_method", "warning_message", "warning_type"], + [ + ("load_parquet", "chunksize is ignored", DeprecationWarning), + ("load_csv", "chunksize will be ignored", PendingDeprecationWarning), + ], +) +def test_to_gbq_with_chunksize_warns_deprecation( + api_method, warning_message, warning_type +): + with pytest.warns(warning_type, match=warning_message): + try: + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + api_method=api_method, + chunksize=100, + ) + except gbq.TableCreationError: + pass + + +@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) +def test_to_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=True), + ) + with pytest.warns(FutureWarning, match="verbose is deprecated"): + try: + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + verbose=verbose, + ) + except gbq.TableCreationError: + pass + + +def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=True), + ) + try: + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") + except gbq.TableCreationError: + pass + assert len(recwarn) == 0 + + +def test_to_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=False), + ) + try: + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + verbose=True, + ) + except gbq.TableCreationError: + pass + assert len(recwarn) == 0 + + +def test_to_gbq_with_private_key_raises_notimplementederror(): + with pytest.raises(NotImplementedError, match="private_key"): + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + private_key="path/to/key.json", + ) + + +def test_to_gbq_doesnt_run_query(mock_bigquery_client): + try: + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") + except gbq.TableCreationError: + pass + + mock_bigquery_client.query.assert_not_called() + + +def test_to_gbq_w_empty_df(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + gbq.to_gbq(DataFrame(), "my_dataset.my_table", project_id="1234") + mock_bigquery_client.create_table.assert_called_with(mock.ANY) + mock_bigquery_client.load_table_from_dataframe.assert_not_called() + mock_bigquery_client.load_table_from_file.assert_not_called() + + +def test_to_gbq_w_default_project(mock_bigquery_client): + """If no project is specified, we should be able to use project from + default credentials. + """ + import google.api_core.exceptions + from google.cloud.bigquery.table import TableReference + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + gbq.to_gbq(DataFrame(), "my_dataset.my_table") + + mock_bigquery_client.get_table.assert_called_with( + TableReference.from_string("default-project.my_dataset.my_table") + ) + mock_bigquery_client.create_table.assert_called_with(mock.ANY) + table = mock_bigquery_client.create_table.call_args[0][0] + assert table.project == "default-project" + + +def test_to_gbq_w_project_table(mock_bigquery_client): + """If a project is included in the table ID, use that instead of the client + project. See: https://github.com/pydata/pandas-gbq/issues/321 + """ + import google.api_core.exceptions + from google.cloud.bigquery.table import TableReference + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + gbq.to_gbq( + DataFrame(), "project_table.my_dataset.my_table", project_id="project_client", + ) + + mock_bigquery_client.get_table.assert_called_with( + TableReference.from_string("project_table.my_dataset.my_table") + ) + mock_bigquery_client.create_table.assert_called_with(mock.ANY) + table = mock_bigquery_client.create_table.call_args[0][0] + assert table.project == "project_table" + + +def test_to_gbq_create_dataset(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" + ) + gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") + mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) + + +def test_to_gbq_create_dataset_with_location(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" + ) + gbq.to_gbq( + DataFrame([[1]]), "my_dataset.my_table", project_id="1234", location="us-west1" + ) + assert mock_bigquery_client.create_dataset.called + args, _ = mock_bigquery_client.create_dataset.call_args + sent_dataset = args[0] + assert sent_dataset.location == "us-west1" + + +def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" + ) + mock_bigquery_client.create_dataset.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + + with pytest.raises(gbq.GenericGBQException): + gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") + + +def test_to_gbq_does_not_modify_schema_arg(mock_bigquery_client): + """Test of Issue # 277.""" + from google.api_core.exceptions import NotFound + + # Create table with new schema. + mock_bigquery_client.get_table.side_effect = NotFound("nope") + df = DataFrame( + { + "field1": ["a", "b"], + "field2": [1, 2], + "field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)], + } + ) + original_schema = [ + {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field2", "type": "INTEGER"}, + {"name": "field3", "type": "DATE"}, + ] + original_schema_cp = copy.deepcopy(original_schema) + gbq.to_gbq( + df, + "dataset.schematest", + project_id="my-project", + table_schema=original_schema, + if_exists="fail", + ) + assert original_schema == original_schema_cp + + # Test again now that table exists - behavior will differ internally + # branch at if table.exists(table_id) + original_schema = [ + {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field2", "type": "INTEGER"}, + {"name": "field3", "type": "DATE"}, + ] + original_schema_cp = copy.deepcopy(original_schema) + gbq.to_gbq( + df, + "dataset.schematest", + project_id="my-project", + table_schema=original_schema, + if_exists="append", + ) + assert original_schema == original_schema_cp From d97102e1c9f7bc88d19b5ed11b1141d6718193dc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Dec 2021 11:13:32 -0600 Subject: [PATCH 24/37] less intense refactoring --- pandas_gbq/gbq.py | 4 +- tests/unit/test_auth.py | 15 +- tests/unit/test_gbq.py | 481 ++++++++++++++++++++++++++++++++++++ tests/unit/test_read_gbq.py | 260 ------------------- tests/unit/test_to_gbq.py | 61 ----- 5 files changed, 492 insertions(+), 329 deletions(-) delete mode 100644 tests/unit/test_read_gbq.py diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index b557ccc1..cc6bef1f 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -1355,7 +1355,9 @@ def create(self, dataset_id): from google.cloud.bigquery import Dataset if self.exists(dataset_id): - raise DatasetCreationError("Dataset {0} already exists".format(dataset_id)) + raise DatasetCreationError( + "Dataset {0} already " "exists".format(dataset_id) + ) dataset = Dataset(self._dataset_ref(dataset_id)) diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 2fa9b828..d44c6380 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -28,23 +28,24 @@ def test_get_credentials_default_credentials(monkeypatch): import google.auth import google.auth.credentials import google.cloud.bigquery + import pydata_google_auth - def mock_default_credentials(scopes=None, request=None): - return ( - mock.create_autospec(google.auth.credentials.Credentials), - "default-project", - ) + mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials) + + def mock_default_credentials(scopes, **kwargs): + return (mock_user_credentials, "test-project") - monkeypatch.setattr(google.auth, "default", mock_default_credentials) + monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials) credentials, project = auth.get_credentials() - assert project == "default-project" + assert project == "test-project" assert credentials is not None def test_get_credentials_load_user_no_default(monkeypatch): import google.auth import google.auth.credentials + import pydata_google_auth import pydata_google_auth.cache mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index d41d2ad0..0f551c27 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -4,8 +4,11 @@ # -*- coding: utf-8 -*- +import copy +import datetime from unittest import mock +import google.api_core.exceptions import numpy import pandas from pandas import DataFrame @@ -97,8 +100,486 @@ def test_GbqConnector_get_client_w_new_bq(mock_bigquery_client): assert kwargs["client_info"].user_agent == "pandas-{}".format(pandas.__version__) +def test_to_gbq_should_fail_if_invalid_table_name_passed(): + with pytest.raises(gbq.NotFoundException): + gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234") + + +def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch): + import pydata_google_auth + + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) + + with pytest.raises(ValueError, match="Could not determine project ID"): + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename") + + +@pytest.mark.parametrize( + ["api_method", "warning_message", "warning_type"], + [ + ("load_parquet", "chunksize is ignored", DeprecationWarning), + ("load_csv", "chunksize will be ignored", PendingDeprecationWarning), + ], +) +def test_to_gbq_with_chunksize_warns_deprecation( + api_method, warning_message, warning_type +): + with pytest.warns(warning_type, match=warning_message): + try: + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + api_method=api_method, + chunksize=100, + ) + except gbq.TableCreationError: + pass + + +@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) +def test_to_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=True), + ) + with pytest.warns(FutureWarning, match="verbose is deprecated"): + try: + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + verbose=verbose, + ) + except gbq.TableCreationError: + pass + + +def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=True), + ) + try: + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") + except gbq.TableCreationError: + pass + assert len(recwarn) == 0 + + +def test_to_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=False), + ) + try: + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + verbose=True, + ) + except gbq.TableCreationError: + pass + assert len(recwarn) == 0 + + +def test_to_gbq_with_private_key_raises_notimplementederror(): + with pytest.raises(NotImplementedError, match="private_key"): + gbq.to_gbq( + DataFrame([[1]]), + "dataset.tablename", + project_id="my-project", + private_key="path/to/key.json", + ) + + +def test_to_gbq_doesnt_run_query(mock_bigquery_client): + try: + gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") + except gbq.TableCreationError: + pass + + mock_bigquery_client.query.assert_not_called() + + +def test_to_gbq_w_empty_df(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + gbq.to_gbq(DataFrame(), "my_dataset.my_table", project_id="1234") + mock_bigquery_client.create_table.assert_called_with(mock.ANY) + mock_bigquery_client.load_table_from_dataframe.assert_not_called() + mock_bigquery_client.load_table_from_file.assert_not_called() + + +def test_to_gbq_w_default_project(mock_bigquery_client): + """If no project is specified, we should be able to use project from + default credentials. + """ + import google.api_core.exceptions + from google.cloud.bigquery.table import TableReference + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + gbq.to_gbq(DataFrame(), "my_dataset.my_table") + + mock_bigquery_client.get_table.assert_called_with( + TableReference.from_string("default-project.my_dataset.my_table") + ) + mock_bigquery_client.create_table.assert_called_with(mock.ANY) + table = mock_bigquery_client.create_table.call_args[0][0] + assert table.project == "default-project" + + +def test_to_gbq_w_project_table(mock_bigquery_client): + """If a project is included in the table ID, use that instead of the client + project. See: https://github.com/pydata/pandas-gbq/issues/321 + """ + import google.api_core.exceptions + from google.cloud.bigquery.table import TableReference + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + gbq.to_gbq( + DataFrame(), "project_table.my_dataset.my_table", project_id="project_client", + ) + + mock_bigquery_client.get_table.assert_called_with( + TableReference.from_string("project_table.my_dataset.my_table") + ) + mock_bigquery_client.create_table.assert_called_with(mock.ANY) + table = mock_bigquery_client.create_table.call_args[0][0] + assert table.project == "project_table" + + +def test_to_gbq_create_dataset(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" + ) + gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") + mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) + + +def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): + import google.api_core.exceptions + + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" + ) + mock_bigquery_client.create_dataset.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + + with pytest.raises(gbq.GenericGBQException): + gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") + + +def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): + import pydata_google_auth + + monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) + + with pytest.raises(ValueError, match="Could not determine project ID"): + gbq.read_gbq("SELECT 1", dialect="standard") + + +def test_read_gbq_with_inferred_project_id(mock_bigquery_client): + df = gbq.read_gbq("SELECT 1", dialect="standard") + assert df is not None + mock_bigquery_client.query.assert_called_once() + + +def test_read_gbq_with_inferred_project_id_from_service_account_credentials( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1", dialect="standard", credentials=mock_service_account_credentials, + ) + assert df is not None + mock_bigquery_client.query.assert_called_once_with( + "SELECT 1", + job_config=mock.ANY, + location=None, + project="service_account_project_id", + ) + + +def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials( + mock_compute_engine_credentials, +): + with pytest.raises(ValueError, match="Could not determine project ID"): + gbq.read_gbq( + "SELECT 1", dialect="standard", credentials=mock_compute_engine_credentials, + ) + + +def test_read_gbq_with_max_results_zero(monkeypatch): + df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=0) + assert df is None + + +def test_read_gbq_with_max_results_ten(monkeypatch, mock_bigquery_client): + df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=10) + assert df is not None + mock_bigquery_client.list_rows.assert_called_with(mock.ANY, max_results=10) + + +@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) +def test_read_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=True), + ) + with pytest.warns(FutureWarning, match="verbose is deprecated"): + gbq.read_gbq("SELECT 1", project_id="my-project", verbose=verbose) + + +def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=False), + ) + gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard") + assert len(recwarn) == 0 + + +def test_read_gbq_with_old_bq_raises_importerror(monkeypatch): + import google.cloud.bigquery + + monkeypatch.setattr(google.cloud.bigquery, "__version__", "0.27.0") + monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None) + with pytest.raises(ImportError, match="google-cloud-bigquery"): + gbq.read_gbq( + "SELECT 1", project_id="my-project", + ) + + +def test_read_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): + monkeypatch.setattr( + type(FEATURES), + "pandas_has_deprecated_verbose", + mock.PropertyMock(return_value=False), + ) + gbq.read_gbq( + "SELECT 1", project_id="my-project", dialect="standard", verbose=True, + ) + assert len(recwarn) == 0 + + +def test_read_gbq_with_private_raises_notimplmentederror(): + with pytest.raises(NotImplementedError, match="private_key"): + gbq.read_gbq( + "SELECT 1", project_id="my-project", private_key="path/to/key.json" + ) + + +def test_read_gbq_with_invalid_dialect(): + with pytest.raises(ValueError, match="is not valid for dialect"): + gbq.read_gbq("SELECT 1", dialect="invalid") + + +def test_read_gbq_with_configuration_query(): + df = gbq.read_gbq(None, configuration={"query": {"query": "SELECT 2"}}) + assert df is not None + + +def test_read_gbq_with_configuration_duplicate_query_raises_error(): + with pytest.raises( + ValueError, match="Query statement can't be specified inside config" + ): + gbq.read_gbq("SELECT 1", configuration={"query": {"query": "SELECT 2"}}) + + def test_generate_bq_schema_deprecated(): # 11121 Deprecation of generate_bq_schema with pytest.warns(FutureWarning): df = DataFrame([[1, "two"], [3, "four"]]) gbq.generate_bq_schema(df) + + +def test_load_does_not_modify_schema_arg(mock_bigquery_client): + """Test of Issue # 277.""" + from google.api_core.exceptions import NotFound + + # Create table with new schema. + mock_bigquery_client.get_table.side_effect = NotFound("nope") + df = DataFrame( + { + "field1": ["a", "b"], + "field2": [1, 2], + "field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)], + } + ) + original_schema = [ + {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field2", "type": "INTEGER"}, + {"name": "field3", "type": "DATE"}, + ] + original_schema_cp = copy.deepcopy(original_schema) + gbq.to_gbq( + df, + "dataset.schematest", + project_id="my-project", + table_schema=original_schema, + if_exists="fail", + ) + assert original_schema == original_schema_cp + + # Test again now that table exists - behavior will differ internally + # branch at if table.exists(table_id) + original_schema = [ + {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field2", "type": "INTEGER"}, + {"name": "field3", "type": "DATE"}, + ] + original_schema_cp = copy.deepcopy(original_schema) + gbq.to_gbq( + df, + "dataset.schematest", + project_id="my-project", + table_schema=original_schema, + if_exists="append", + ) + assert original_schema == original_schema_cp + + +def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_credentials): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1 AS int_col", + dialect="standard", + credentials=mock_service_account_credentials, + dtypes={"int_col": "my-custom-dtype"}, + ) + assert df is not None + + mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) + + _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args + assert to_dataframe_kwargs["dtypes"] == {"int_col": "my-custom-dtype"} + + +def test_read_gbq_use_bqstorage_api( + mock_bigquery_client, mock_service_account_credentials +): + if not FEATURES.bigquery_has_bqstorage: # pragma: NO COVER + pytest.skip("requires BigQuery Storage API") + + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1 AS int_col", + dialect="standard", + credentials=mock_service_account_credentials, + use_bqstorage_api=True, + ) + assert df is not None + + mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) + mock_list_rows.to_dataframe.assert_called_once_with( + create_bqstorage_client=True, dtypes=mock.ANY, progress_bar_type=mock.ANY, + ) + + +def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credentials): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "SELECT 1", + dialect="standard", + credentials=mock_service_account_credentials, + progress_bar_type="foobar", + ) + assert df is not None + + mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) + + _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args + assert to_dataframe_kwargs["progress_bar_type"] == "foobar" + + +def test_read_gbq_with_full_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + project_id="param-project", + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + + +def test_read_gbq_with_partial_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + project_id="param-project", + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "param-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + + +def test_read_gbq_bypasses_query_with_table_id_and_max_results( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + max_results=11, + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] + assert sent_max_results == 11 + + +def test_read_gbq_with_list_rows_error_translates_exception( + mock_bigquery_client, mock_service_account_credentials +): + mock_bigquery_client.list_rows.side_effect = ( + google.api_core.exceptions.NotFound("table not found"), + ) + + with pytest.raises(gbq.GenericGBQException, match="table not found"): + gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + ) diff --git a/tests/unit/test_read_gbq.py b/tests/unit/test_read_gbq.py deleted file mode 100644 index 4e5e43f7..00000000 --- a/tests/unit/test_read_gbq.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) 2021 pandas-gbq Authors All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -from unittest import mock - -import google.api_core.exceptions -import pytest - -from pandas_gbq import gbq -from pandas_gbq.features import FEATURES - - -def mock_get_credentials_no_project(*args, **kwargs): - import google.auth.credentials - - mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) - return mock_credentials, None - - -def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): - import pydata_google_auth - - monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) - - with pytest.raises(ValueError, match="Could not determine project ID"): - gbq.read_gbq("SELECT 1", dialect="standard") - - -def test_read_gbq_with_inferred_project_id(mock_bigquery_client): - df = gbq.read_gbq("SELECT 1", dialect="standard") - assert df is not None - mock_bigquery_client.query.assert_called_once() - - -def test_read_gbq_with_inferred_project_id_from_service_account_credentials( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1", dialect="standard", credentials=mock_service_account_credentials, - ) - assert df is not None - mock_bigquery_client.query.assert_called_once_with( - "SELECT 1", - job_config=mock.ANY, - location=None, - project="service_account_project_id", - ) - - -def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials( - mock_compute_engine_credentials, -): - with pytest.raises(ValueError, match="Could not determine project ID"): - gbq.read_gbq( - "SELECT 1", dialect="standard", credentials=mock_compute_engine_credentials, - ) - - -def test_read_gbq_with_max_results_zero(monkeypatch): - df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=0) - assert df is None - - -def test_read_gbq_with_max_results_ten(monkeypatch, mock_bigquery_client): - df = gbq.read_gbq("SELECT 1", dialect="standard", max_results=10) - assert df is not None - mock_bigquery_client.list_rows.assert_called_with(mock.ANY, max_results=10) - - -@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) -def test_read_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=True), - ) - with pytest.warns(FutureWarning, match="verbose is deprecated"): - gbq.read_gbq("SELECT 1", project_id="my-project", verbose=verbose) - - -def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=False), - ) - gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard") - assert len(recwarn) == 0 - - -def test_read_gbq_with_old_bq_raises_importerror(monkeypatch): - import google.cloud.bigquery - - monkeypatch.setattr(google.cloud.bigquery, "__version__", "0.27.0") - monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None) - with pytest.raises(ImportError, match="google-cloud-bigquery"): - gbq.read_gbq( - "SELECT 1", project_id="my-project", - ) - - -def test_read_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=False), - ) - gbq.read_gbq( - "SELECT 1", project_id="my-project", dialect="standard", verbose=True, - ) - assert len(recwarn) == 0 - - -def test_read_gbq_with_private_raises_notimplmentederror(): - with pytest.raises(NotImplementedError, match="private_key"): - gbq.read_gbq( - "SELECT 1", project_id="my-project", private_key="path/to/key.json" - ) - - -def test_read_gbq_with_invalid_dialect(): - with pytest.raises(ValueError, match="is not valid for dialect"): - gbq.read_gbq("SELECT 1", dialect="invalid") - - -def test_read_gbq_with_configuration_query(): - df = gbq.read_gbq(None, configuration={"query": {"query": "SELECT 2"}}) - assert df is not None - - -def test_read_gbq_with_configuration_duplicate_query_raises_error(): - with pytest.raises( - ValueError, match="Query statement can't be specified inside config" - ): - gbq.read_gbq("SELECT 1", configuration={"query": {"query": "SELECT 2"}}) - - -def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_credentials): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1 AS int_col", - dialect="standard", - credentials=mock_service_account_credentials, - dtypes={"int_col": "my-custom-dtype"}, - ) - assert df is not None - - mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) - - _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args - assert to_dataframe_kwargs["dtypes"] == {"int_col": "my-custom-dtype"} - - -def test_read_gbq_use_bqstorage_api( - mock_bigquery_client, mock_service_account_credentials -): - if not FEATURES.bigquery_has_bqstorage: # pragma: NO COVER - pytest.skip("requires BigQuery Storage API") - - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1 AS int_col", - dialect="standard", - credentials=mock_service_account_credentials, - use_bqstorage_api=True, - ) - assert df is not None - - mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) - mock_list_rows.to_dataframe.assert_called_once_with( - create_bqstorage_client=True, dtypes=mock.ANY, progress_bar_type=mock.ANY, - ) - - -def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credentials): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "SELECT 1", - dialect="standard", - credentials=mock_service_account_credentials, - progress_bar_type="foobar", - ) - assert df is not None - - mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) - - _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args - assert to_dataframe_kwargs["progress_bar_type"] == "foobar" - - -def test_read_gbq_with_full_table_id( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "my-project.my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - project_id="param-project", - ) - assert df is not None - - mock_bigquery_client.query.assert_not_called() - sent_table = mock_bigquery_client.list_rows.call_args[0][0] - assert sent_table.project == "my-project" - assert sent_table.dataset_id == "my_dataset" - assert sent_table.table_id == "read_gbq_table" - - -def test_read_gbq_with_partial_table_id( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - project_id="param-project", - ) - assert df is not None - - mock_bigquery_client.query.assert_not_called() - sent_table = mock_bigquery_client.list_rows.call_args[0][0] - assert sent_table.project == "param-project" - assert sent_table.dataset_id == "my_dataset" - assert sent_table.table_id == "read_gbq_table" - - -def test_read_gbq_bypasses_query_with_table_id_and_max_results( - mock_bigquery_client, mock_service_account_credentials -): - mock_service_account_credentials.project_id = "service_account_project_id" - df = gbq.read_gbq( - "my-project.my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - max_results=11, - ) - assert df is not None - - mock_bigquery_client.query.assert_not_called() - sent_table = mock_bigquery_client.list_rows.call_args[0][0] - assert sent_table.project == "my-project" - assert sent_table.dataset_id == "my_dataset" - assert sent_table.table_id == "read_gbq_table" - sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] - assert sent_max_results == 11 - - -def test_read_gbq_with_list_rows_error_translates_exception( - mock_bigquery_client, mock_service_account_credentials -): - mock_bigquery_client.list_rows.side_effect = ( - google.api_core.exceptions.NotFound("table not found"), - ) - - with pytest.raises(gbq.GenericGBQException, match="table not found"): - gbq.read_gbq( - "my-project.my_dataset.read_gbq_table", - credentials=mock_service_account_credentials, - ) diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 8c07f557..abae1b15 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -2,8 +2,6 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -import copy -import datetime from unittest import mock from pandas import DataFrame @@ -180,19 +178,6 @@ def test_to_gbq_w_project_table(mock_bigquery_client): assert table.project == "project_table" -def test_to_gbq_create_dataset(mock_bigquery_client): - import google.api_core.exceptions - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( - "my_dataset" - ) - gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") - mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) - - def test_to_gbq_create_dataset_with_location(mock_bigquery_client): import google.api_core.exceptions @@ -226,49 +211,3 @@ def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): with pytest.raises(gbq.GenericGBQException): gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") - - -def test_to_gbq_does_not_modify_schema_arg(mock_bigquery_client): - """Test of Issue # 277.""" - from google.api_core.exceptions import NotFound - - # Create table with new schema. - mock_bigquery_client.get_table.side_effect = NotFound("nope") - df = DataFrame( - { - "field1": ["a", "b"], - "field2": [1, 2], - "field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)], - } - ) - original_schema = [ - {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, - {"name": "field2", "type": "INTEGER"}, - {"name": "field3", "type": "DATE"}, - ] - original_schema_cp = copy.deepcopy(original_schema) - gbq.to_gbq( - df, - "dataset.schematest", - project_id="my-project", - table_schema=original_schema, - if_exists="fail", - ) - assert original_schema == original_schema_cp - - # Test again now that table exists - behavior will differ internally - # branch at if table.exists(table_id) - original_schema = [ - {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, - {"name": "field2", "type": "INTEGER"}, - {"name": "field3", "type": "DATE"}, - ] - original_schema_cp = copy.deepcopy(original_schema) - gbq.to_gbq( - df, - "dataset.schematest", - project_id="my-project", - table_schema=original_schema, - if_exists="append", - ) - assert original_schema == original_schema_cp From c48f9970fb148b2db7060263c3b55ca87111f412 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Dec 2021 14:10:14 -0600 Subject: [PATCH 25/37] more refactor cleanup --- pandas_gbq/schema.py | 2 -- tests/unit/test_gbq.py | 17 ----------------- tests/unit/test_load.py | 3 ++- 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index 5877ddfb..cfa1c765 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -113,8 +113,6 @@ def generate_bq_schema(dataframe, default_type="STRING"): "S": "STRING", "U": "STRING", "M": "TIMESTAMP", - # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is - # localized. } fields = [] diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 0f551c27..22927885 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -273,23 +273,6 @@ def test_to_gbq_create_dataset(mock_bigquery_client): mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) -def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): - import google.api_core.exceptions - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( - "my_dataset" - ) - mock_bigquery_client.create_dataset.side_effect = google.api_core.exceptions.InternalServerError( - "something went wrong" - ) - - with pytest.raises(gbq.GenericGBQException): - gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") - - def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 9f1b685b..b35c735c 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -166,7 +166,8 @@ def test_load_csv_from_file_generates_schema(mock_bigquery_client): assert sent_schema[3].name == "string_col" assert sent_schema[3].field_type == "STRING" # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is - # localized. + # localized or at least use field type from table metadata. See: + # https://github.com/googleapis/python-bigquery-pandas/issues/450 assert sent_schema[4].name == "datetime_col" assert sent_schema[4].field_type == "TIMESTAMP" assert sent_schema[5].name == "timestamp_col" From 9e671383395712c1de15cb4a86176314d09464d8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Dec 2021 14:25:50 -0600 Subject: [PATCH 26/37] more tests --- tests/unit/test_gbq.py | 34 ++++++++ tests/unit/test_to_gbq.py | 161 -------------------------------------- 2 files changed, 34 insertions(+), 161 deletions(-) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 22927885..8f5da082 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -273,6 +273,40 @@ def test_to_gbq_create_dataset(mock_bigquery_client): mock_bigquery_client.create_dataset.assert_called_with(mock.ANY) +def test_dataset_create_already_exists_translates_exception(mock_bigquery_client): + dataset_connector = gbq._Dataset("my-project") + dataset_connector.client = mock_bigquery_client + mock_bigquery_client.get_dataset.return_value = object() + with pytest.raises(gbq.DatasetCreationError): + dataset_connector.create("already_exists") + + +def test_dataset_exists_false(mock_bigquery_client): + dataset_connector = gbq._Dataset("my-project") + dataset_connector.client = mock_bigquery_client + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "nope" + ) + assert not dataset_connector.exists("not_exists") + + +def test_dataset_exists_true(mock_bigquery_client): + dataset_connector = gbq._Dataset("my-project") + dataset_connector.client = mock_bigquery_client + mock_bigquery_client.get_dataset.return_value = object() + assert dataset_connector.exists("yes_exists") + + +def test_dataset_exists_translates_exception(mock_bigquery_client): + dataset_connector = gbq._Dataset("my-project") + dataset_connector.client = mock_bigquery_client + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + with pytest.raises(gbq.GenericGBQException): + dataset_connector.exists("not_gonna_work") + + def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index abae1b15..8f62a99c 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -8,7 +8,6 @@ import pytest from pandas_gbq import gbq -from pandas_gbq.features import FEATURES def mock_get_credentials_no_project(*args, **kwargs): @@ -18,166 +17,6 @@ def mock_get_credentials_no_project(*args, **kwargs): return mock_credentials, None -def test_to_gbq_should_fail_if_invalid_table_name_passed(): - with pytest.raises(gbq.NotFoundException): - gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234") - - -def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch): - import pydata_google_auth - - monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) - - with pytest.raises(ValueError, match="Could not determine project ID"): - gbq.to_gbq(DataFrame([[1]]), "dataset.tablename") - - -@pytest.mark.parametrize( - ["api_method", "warning_message", "warning_type"], - [ - ("load_parquet", "chunksize is ignored", DeprecationWarning), - ("load_csv", "chunksize will be ignored", PendingDeprecationWarning), - ], -) -def test_to_gbq_with_chunksize_warns_deprecation( - api_method, warning_message, warning_type -): - with pytest.warns(warning_type, match=warning_message): - try: - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - api_method=api_method, - chunksize=100, - ) - except gbq.TableCreationError: - pass - - -@pytest.mark.parametrize(["verbose"], [(True,), (False,)]) -def test_to_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=True), - ) - with pytest.warns(FutureWarning, match="verbose is deprecated"): - try: - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - verbose=verbose, - ) - except gbq.TableCreationError: - pass - - -def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=True), - ) - try: - gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") - except gbq.TableCreationError: - pass - assert len(recwarn) == 0 - - -def test_to_gbq_with_verbose_old_pandas_no_warnings(monkeypatch, recwarn): - monkeypatch.setattr( - type(FEATURES), - "pandas_has_deprecated_verbose", - mock.PropertyMock(return_value=False), - ) - try: - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - verbose=True, - ) - except gbq.TableCreationError: - pass - assert len(recwarn) == 0 - - -def test_to_gbq_with_private_key_raises_notimplementederror(): - with pytest.raises(NotImplementedError, match="private_key"): - gbq.to_gbq( - DataFrame([[1]]), - "dataset.tablename", - project_id="my-project", - private_key="path/to/key.json", - ) - - -def test_to_gbq_doesnt_run_query(mock_bigquery_client): - try: - gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project") - except gbq.TableCreationError: - pass - - mock_bigquery_client.query.assert_not_called() - - -def test_to_gbq_w_empty_df(mock_bigquery_client): - import google.api_core.exceptions - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - gbq.to_gbq(DataFrame(), "my_dataset.my_table", project_id="1234") - mock_bigquery_client.create_table.assert_called_with(mock.ANY) - mock_bigquery_client.load_table_from_dataframe.assert_not_called() - mock_bigquery_client.load_table_from_file.assert_not_called() - - -def test_to_gbq_w_default_project(mock_bigquery_client): - """If no project is specified, we should be able to use project from - default credentials. - """ - import google.api_core.exceptions - from google.cloud.bigquery.table import TableReference - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - gbq.to_gbq(DataFrame(), "my_dataset.my_table") - - mock_bigquery_client.get_table.assert_called_with( - TableReference.from_string("default-project.my_dataset.my_table") - ) - mock_bigquery_client.create_table.assert_called_with(mock.ANY) - table = mock_bigquery_client.create_table.call_args[0][0] - assert table.project == "default-project" - - -def test_to_gbq_w_project_table(mock_bigquery_client): - """If a project is included in the table ID, use that instead of the client - project. See: https://github.com/pydata/pandas-gbq/issues/321 - """ - import google.api_core.exceptions - from google.cloud.bigquery.table import TableReference - - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - gbq.to_gbq( - DataFrame(), "project_table.my_dataset.my_table", project_id="project_client", - ) - - mock_bigquery_client.get_table.assert_called_with( - TableReference.from_string("project_table.my_dataset.my_table") - ) - mock_bigquery_client.create_table.assert_called_with(mock.ANY) - table = mock_bigquery_client.create_table.call_args[0][0] - assert table.project == "project_table" - - def test_to_gbq_create_dataset_with_location(mock_bigquery_client): import google.api_core.exceptions From 83c451376f41ca19b03ed3ea8d034d7b566f5bb3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 23 Dec 2021 15:56:31 -0600 Subject: [PATCH 27/37] add to_gbq tests --- pandas_gbq/gbq.py | 22 +++++-- tests/unit/conftest.py | 29 -------- tests/unit/test_context.py | 16 +++++ tests/unit/test_gbq.py | 132 +++++++++++++++++++++++++++++++++---- tests/unit/test_to_gbq.py | 69 +++++++++++++++---- 5 files changed, 209 insertions(+), 59 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 5dcc3fd0..5c2f54dd 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -121,7 +121,20 @@ class InvalidSchema(ValueError): table in BigQuery. """ - pass + def __init__( + self, message: str, local_schema: Dict[str, Any], remote_schema: Dict[str, Any] + ): + super().__init__(message) + self._local_schema = local_schema + self._remote_schema = remote_schema + + @property + def local_schema(self) -> Dict[str, Any]: + return self._local_schema + + @property + def remote_schema(self) -> Dict[str, Any]: + return self._remote_schema class NotFoundException(ValueError): @@ -1127,7 +1140,9 @@ def to_gbq( raise InvalidSchema( "Please verify that the structure and " "data types in the DataFrame match the " - "schema of the destination table." + "schema of the destination table.", + table_schema, + original_schema, ) # Update the local `table_schema` so mode (NULLABLE/REQUIRED) @@ -1283,9 +1298,6 @@ def delete(self, table_id): """ from google.api_core.exceptions import NotFound - if not self.exists(table_id): - raise NotFoundException("Table does not exist") - table_ref = self._table_ref(table_id) try: self.client.delete_table(table_ref) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 650f851c..50a9ac1a 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -50,41 +50,12 @@ def reset_context(): @pytest.fixture(autouse=True) def mock_bigquery_client(monkeypatch): import google.cloud.bigquery - import google.cloud.bigquery.table mock_client = mock.create_autospec(google.cloud.bigquery.Client) # Constructor returns the mock itself, so this mock can be treated as the # constructor or the instance. mock_client.return_value = mock_client - - mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) - mock_query.job_id = "some-random-id" - mock_query.state = "DONE" - mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator) - mock_rows.total_rows = 1 - - mock_rows.__iter__.return_value = [(1,)] - mock_query.result.return_value = mock_rows - mock_client.list_rows.return_value = mock_rows - mock_client.query.return_value = mock_query - # Mock table creation. monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client) mock_client.reset_mock() - # Mock out SELECT 1 query results. - def generate_schema(): - query = mock_client.query.call_args[0][0] if mock_client.query.call_args else "" - if query == "SELECT 1 AS int_col": - return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")] - else: - return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")] - - type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema) - - # Mock out get_table. - def get_table(table_ref_or_id, **kwargs): - return google.cloud.bigquery.Table(table_ref_or_id) - - mock_client.get_table.side_effect = get_table - return mock_client diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index c0521745..1cf420f0 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -6,9 +6,25 @@ from unittest import mock +import google.cloud.bigquery +import google.cloud.bigquery.table import pytest +@pytest.fixture(autouse=True) +def default_bigquery_client(mock_bigquery_client): + mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) + mock_query.job_id = "some-random-id" + mock_query.state = "DONE" + mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator) + mock_rows.total_rows = 1 + mock_rows.__iter__.return_value = [(1,)] + mock_query.result.return_value = mock_rows + mock_bigquery_client.list_rows.return_value = mock_rows + mock_bigquery_client.query.return_value = mock_query + return mock_bigquery_client + + @pytest.fixture(autouse=True) def mock_get_credentials(monkeypatch): from pandas_gbq import auth diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 8f5da082..b0deb6ba 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -32,6 +32,43 @@ def mock_get_credentials_no_project(*args, **kwargs): return mock_credentials, None +@pytest.fixture(autouse=True) +def default_bigquery_client(mock_bigquery_client): + + mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) + mock_query.job_id = "some-random-id" + mock_query.state = "DONE" + mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator) + mock_rows.total_rows = 1 + + mock_rows.__iter__.return_value = [(1,)] + mock_query.result.return_value = mock_rows + mock_bigquery_client.list_rows.return_value = mock_rows + mock_bigquery_client.query.return_value = mock_query + + # Mock out SELECT 1 query results. + def generate_schema(): + query = ( + mock_bigquery_client.query.call_args[0][0] + if mock_bigquery_client.query.call_args + else "" + ) + if query == "SELECT 1 AS int_col": + return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")] + else: + return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")] + + type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema) + + # Mock out get_table. + def get_table(table_ref_or_id, **kwargs): + return google.cloud.bigquery.Table(table_ref_or_id) + + mock_bigquery_client.get_table.side_effect = get_table + + return mock_bigquery_client + + @pytest.mark.parametrize( ("type_", "expected"), [ @@ -274,37 +311,108 @@ def test_to_gbq_create_dataset(mock_bigquery_client): def test_dataset_create_already_exists_translates_exception(mock_bigquery_client): - dataset_connector = gbq._Dataset("my-project") - dataset_connector.client = mock_bigquery_client + connector = gbq._Dataset("my-project") + connector.client = mock_bigquery_client mock_bigquery_client.get_dataset.return_value = object() with pytest.raises(gbq.DatasetCreationError): - dataset_connector.create("already_exists") + connector.create("already_exists") def test_dataset_exists_false(mock_bigquery_client): - dataset_connector = gbq._Dataset("my-project") - dataset_connector.client = mock_bigquery_client + connector = gbq._Dataset("my-project") + connector.client = mock_bigquery_client mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( "nope" ) - assert not dataset_connector.exists("not_exists") + assert not connector.exists("not_exists") def test_dataset_exists_true(mock_bigquery_client): - dataset_connector = gbq._Dataset("my-project") - dataset_connector.client = mock_bigquery_client + connector = gbq._Dataset("my-project") + connector.client = mock_bigquery_client mock_bigquery_client.get_dataset.return_value = object() - assert dataset_connector.exists("yes_exists") + assert connector.exists("yes_exists") def test_dataset_exists_translates_exception(mock_bigquery_client): - dataset_connector = gbq._Dataset("my-project") - dataset_connector.client = mock_bigquery_client + connector = gbq._Dataset("my-project") + connector.client = mock_bigquery_client mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.InternalServerError( "something went wrong" ) with pytest.raises(gbq.GenericGBQException): - dataset_connector.exists("not_gonna_work") + connector.exists("not_gonna_work") + + +def test_table_create_already_exists(mock_bigquery_client): + connector = gbq._Table("my-project", "my_dataset") + connector.client = mock_bigquery_client + mock_bigquery_client.get_table.return_value = object() + with pytest.raises(gbq.TableCreationError): + connector.create( + "already_exists", {"fields": [{"name": "f", "type": "STRING"}]} + ) + + +def test_table_create_translates_exception(mock_bigquery_client): + connector = gbq._Table("my-project", "my_dataset") + connector.client = mock_bigquery_client + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "nope" + ) + mock_bigquery_client.create_table.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + with pytest.raises(gbq.GenericGBQException): + connector.create( + "not_gonna_work", {"fields": [{"name": "f", "type": "STRING"}]} + ) + + +def test_table_delete_notfound_ok(mock_bigquery_client): + connector = gbq._Table("my-project", "my_dataset") + connector.client = mock_bigquery_client + mock_bigquery_client.delete_table.side_effect = google.api_core.exceptions.NotFound( + "nope" + ) + connector.delete("not_exists") + mock_bigquery_client.delete_table.assert_called_once() + + +def test_table_delete_translates_exception(mock_bigquery_client): + connector = gbq._Table("my-project", "my_dataset") + connector.client = mock_bigquery_client + mock_bigquery_client.delete_table.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + with pytest.raises(gbq.GenericGBQException): + connector.delete("not_gonna_work") + + +def test_table_exists_false(mock_bigquery_client): + connector = gbq._Table("my-project", "my_dataset") + connector.client = mock_bigquery_client + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "nope" + ) + assert not connector.exists("not_exists") + + +def test_table_exists_true(mock_bigquery_client): + connector = gbq._Table("my-project", "my_dataset") + connector.client = mock_bigquery_client + mock_bigquery_client.get_table.return_value = object() + assert connector.exists("yes_exists") + + +def test_table_exists_translates_exception(mock_bigquery_client): + connector = gbq._Table("my-project", "my_dataset") + connector.client = mock_bigquery_client + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + with pytest.raises(gbq.GenericGBQException): + connector.exists("not_gonna_work") def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 8f62a99c..af8b6a2e 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -2,24 +2,15 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. -from unittest import mock - +import google.cloud.bigquery +import google.api_core.exceptions from pandas import DataFrame import pytest from pandas_gbq import gbq -def mock_get_credentials_no_project(*args, **kwargs): - import google.auth.credentials - - mock_credentials = mock.create_autospec(google.auth.credentials.Credentials) - return mock_credentials, None - - def test_to_gbq_create_dataset_with_location(mock_bigquery_client): - import google.api_core.exceptions - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( "my_table" ) @@ -35,9 +26,61 @@ def test_to_gbq_create_dataset_with_location(mock_bigquery_client): assert sent_dataset.location == "us-west1" -def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): - import google.api_core.exceptions +def test_to_gbq_create_dataset_with_if_exists_append(mock_bigquery_client): + from google.cloud.bigquery import SchemaField + + mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( + "myproj.my_dataset.my_table", + schema=( + SchemaField("col_a", "FLOAT", mode="REQUIRED"), + SchemaField("col_b", "STRING", mode="REQUIRED"), + ), + ) + gbq.to_gbq( + DataFrame({"col_a": [0.25, 1.5, -1.0], "col_b": ["a", "b", "c"]}), + "my_dataset.my_table", + project_id="myproj", + if_exists="append", + ) + mock_bigquery_client.load_table_from_dataframe.assert_called_once() + +def test_to_gbq_create_dataset_with_if_exists_append_mismatch(mock_bigquery_client): + from google.cloud.bigquery import SchemaField + + mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( + "myproj.my_dataset.my_table", + schema=(SchemaField("col_a", "INTEGER"), SchemaField("col_b", "STRING")), + ) + with pytest.raises(gbq.InvalidSchema): + gbq.to_gbq( + DataFrame({"col_a": [0.25, 1.5, -1.0]}), + "my_dataset.my_table", + project_id="myproj", + if_exists="append", + ) + + +def test_to_gbq_create_dataset_with_if_exists_replace(mock_bigquery_client): + mock_bigquery_client.get_table.side_effect = ( + # Initial check + google.cloud.bigquery.Table("myproj.my_dataset.my_table"), + # Recreate check + google.api_core.exceptions.NotFound("my_table"), + ) + gbq.to_gbq( + DataFrame([[1]]), + "my_dataset.my_table", + project_id="myproj", + if_exists="replace", + ) + # TODO: We can avoid these API calls by using write disposition in the load + # job. See: https://github.com/googleapis/python-bigquery-pandas/issues/118 + assert mock_bigquery_client.delete_table.called + assert mock_bigquery_client.create_table.called + + +def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( "my_table" ) From 4f12c786b73364430c78d85f776de8989f395c8a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 23 Dec 2021 16:13:27 -0600 Subject: [PATCH 28/37] boost coverage --- noxfile.py | 2 +- owlbot.py | 2 +- pandas_gbq/gbq.py | 2 +- tests/unit/test_gbq.py | 1 - tests/unit/test_to_gbq.py | 14 ++++++++++++-- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index 43825e5e..5e41983b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=90") + session.run("coverage", "report", "--show-missing", "--fail-under=94") session.run("coverage", "erase") diff --git a/owlbot.py b/owlbot.py index 3c31a67f..62c9f3c4 100644 --- a/owlbot.py +++ b/owlbot.py @@ -33,7 +33,7 @@ templated_files = common.py_library( unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"], system_test_python_versions=["3.7", "3.8", "3.9", "3.10"], - cov_level=90, + cov_level=94, unit_test_extras=extras, system_test_extras=extras, intersphinx_dependencies={ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 5c2f54dd..68544d89 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -1135,7 +1135,7 @@ def to_gbq( ) elif if_exists == "replace": connector.delete_and_recreate_table(dataset_id, table_id, table_schema) - elif if_exists == "append": + else: if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema): raise InvalidSchema( "Please verify that the structure and " diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index b0deb6ba..8740f618 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -34,7 +34,6 @@ def mock_get_credentials_no_project(*args, **kwargs): @pytest.fixture(autouse=True) def default_bigquery_client(mock_bigquery_client): - mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) mock_query.job_id = "some-random-id" mock_query.state = "DONE" diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index af8b6a2e..a6c49c37 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -8,6 +8,14 @@ import pytest from pandas_gbq import gbq +from pandas_gbq.features import FEATURES + + +@pytest.fixture +def expected_load_method(mock_bigquery_client): + if FEATURES.pandas_has_parquet_with_lossless_timestamp: + return mock_bigquery_client.load_table_from_dataframe + return mock_bigquery_client.load_table_from_file def test_to_gbq_create_dataset_with_location(mock_bigquery_client): @@ -26,7 +34,9 @@ def test_to_gbq_create_dataset_with_location(mock_bigquery_client): assert sent_dataset.location == "us-west1" -def test_to_gbq_create_dataset_with_if_exists_append(mock_bigquery_client): +def test_to_gbq_create_dataset_with_if_exists_append( + mock_bigquery_client, expected_load_method +): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -42,7 +52,7 @@ def test_to_gbq_create_dataset_with_if_exists_append(mock_bigquery_client): project_id="myproj", if_exists="append", ) - mock_bigquery_client.load_table_from_dataframe.assert_called_once() + expected_load_method.assert_called_once() def test_to_gbq_create_dataset_with_if_exists_append_mismatch(mock_bigquery_client): From 4fedaaf45d6cf8948899e96959128a2d42a35672 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 23 Dec 2021 22:15:28 +0000 Subject: [PATCH 29/37] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index 935f2a53..0a3b1cea 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,7 +22,7 @@ omit = google/cloud/__init__.py [report] -fail_under = 90 +fail_under = 94 show_missing = True exclude_lines = # Re-enable the standard pragma From 2bfd5a197ab10c01de0fcdd6a3920bc106e36b25 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 23 Dec 2021 16:16:34 -0600 Subject: [PATCH 30/37] cover new properties --- tests/unit/test_to_gbq.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index a6c49c37..98b0c97f 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -62,7 +62,7 @@ def test_to_gbq_create_dataset_with_if_exists_append_mismatch(mock_bigquery_clie "myproj.my_dataset.my_table", schema=(SchemaField("col_a", "INTEGER"), SchemaField("col_b", "STRING")), ) - with pytest.raises(gbq.InvalidSchema): + with pytest.raises(gbq.InvalidSchema) as exception_block: gbq.to_gbq( DataFrame({"col_a": [0.25, 1.5, -1.0]}), "my_dataset.my_table", @@ -70,6 +70,15 @@ def test_to_gbq_create_dataset_with_if_exists_append_mismatch(mock_bigquery_clie if_exists="append", ) + exc = exception_block.value + assert exc.remote_schema == { + "fields": [ + {"name": "col_a", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "col_b", "type": "STRING", "mode": "NULLABLE"}, + ] + } + assert exc.local_schema == {"fields": [{"name": "col_a", "type": "FLOAT"}]} + def test_to_gbq_create_dataset_with_if_exists_replace(mock_bigquery_client): mock_bigquery_client.get_table.side_effect = ( From 3cb788ec9f90fc51d5eb5ab1a316204626991cdf Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 23 Dec 2021 16:22:30 -0600 Subject: [PATCH 31/37] unknown if_exists --- tests/unit/test_to_gbq.py | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/tests/unit/test_to_gbq.py b/tests/unit/test_to_gbq.py index 98b0c97f..e488bdb5 100644 --- a/tests/unit/test_to_gbq.py +++ b/tests/unit/test_to_gbq.py @@ -34,9 +34,22 @@ def test_to_gbq_create_dataset_with_location(mock_bigquery_client): assert sent_dataset.location == "us-west1" -def test_to_gbq_create_dataset_with_if_exists_append( - mock_bigquery_client, expected_load_method -): +def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): + mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( + "my_table" + ) + mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( + "my_dataset" + ) + mock_bigquery_client.create_dataset.side_effect = google.api_core.exceptions.InternalServerError( + "something went wrong" + ) + + with pytest.raises(gbq.GenericGBQException): + gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") + + +def test_to_gbq_with_if_exists_append(mock_bigquery_client, expected_load_method): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -55,7 +68,7 @@ def test_to_gbq_create_dataset_with_if_exists_append( expected_load_method.assert_called_once() -def test_to_gbq_create_dataset_with_if_exists_append_mismatch(mock_bigquery_client): +def test_to_gbq_with_if_exists_append_mismatch(mock_bigquery_client): from google.cloud.bigquery import SchemaField mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table( @@ -80,7 +93,7 @@ def test_to_gbq_create_dataset_with_if_exists_append_mismatch(mock_bigquery_clie assert exc.local_schema == {"fields": [{"name": "col_a", "type": "FLOAT"}]} -def test_to_gbq_create_dataset_with_if_exists_replace(mock_bigquery_client): +def test_to_gbq_with_if_exists_replace(mock_bigquery_client): mock_bigquery_client.get_table.side_effect = ( # Initial check google.cloud.bigquery.Table("myproj.my_dataset.my_table"), @@ -99,16 +112,11 @@ def test_to_gbq_create_dataset_with_if_exists_replace(mock_bigquery_client): assert mock_bigquery_client.create_table.called -def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client): - mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound( - "my_table" - ) - mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound( - "my_dataset" - ) - mock_bigquery_client.create_dataset.side_effect = google.api_core.exceptions.InternalServerError( - "something went wrong" - ) - - with pytest.raises(gbq.GenericGBQException): - gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234") +def test_to_gbq_with_if_exists_unknown(): + with pytest.raises(ValueError): + gbq.to_gbq( + DataFrame([[1]]), + "my_dataset.my_table", + project_id="myproj", + if_exists="unknown", + ) From 95f04781e10468dd6c4b3ad04ff6726f3b29525d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 23 Dec 2021 17:10:04 -0600 Subject: [PATCH 32/37] add session for no deps --- .coveragerc | 2 +- noxfile.py | 17 ++++++++++++++--- owlbot.py | 2 +- pandas_gbq/gbq.py | 17 +++++------------ setup.py | 5 +++-- testing/constraints-3.7.txt | 1 + 6 files changed, 25 insertions(+), 19 deletions(-) diff --git a/.coveragerc b/.coveragerc index 0a3b1cea..cc06722c 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,7 +22,7 @@ omit = google/cloud/__init__.py [report] -fail_under = 94 +fail_under = 95 show_missing = True exclude_lines = # Re-enable the standard pragma diff --git a/noxfile.py b/noxfile.py index 5e41983b..887b6fd6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -36,6 +36,7 @@ # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ + "unit_noextras", "unit", "system", "cover", @@ -79,7 +80,7 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -def default(session): +def default(session, install_extras=True): # Install all test dependencies, then install this package in-place. constraints_path = str( @@ -95,7 +96,11 @@ def default(session): constraints_path, ) - session.install("-e", ".[tqdm]", "-c", constraints_path) + if install_extras: + install_target = ".[tqdm]" + else: + install_target = "." + session.install("-e", install_target, "-c", constraints_path) # Run py.test against the unit tests. session.run( @@ -119,6 +124,12 @@ def unit(session): default(session) +@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) +def unit_noextras(session): + """Run the unit test suite.""" + default(session, install_extras=False) + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" @@ -259,7 +270,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=94") + session.run("coverage", "report", "--show-missing", "--fail-under=95") session.run("coverage", "erase") diff --git a/owlbot.py b/owlbot.py index 62c9f3c4..46b80d58 100644 --- a/owlbot.py +++ b/owlbot.py @@ -33,7 +33,7 @@ templated_files = common.py_library( unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"], system_test_python_versions=["3.7", "3.8", "3.9", "3.10"], - cov_level=94, + cov_level=95, unit_test_extras=extras, system_test_extras=extras, intersphinx_dependencies={ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 68544d89..0a18cc3a 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -367,19 +367,12 @@ def sizeof_fmt(num, suffix="B"): return fmt % (num, "Y", suffix) def get_client(self): + import google.api_core.client_info import pandas - try: - # This module was added in google-api-core 1.11.0. - # We don't have a hard requirement on that version, so only - # populate the client_info if available. - import google.api_core.client_info - - client_info = google.api_core.client_info.ClientInfo( - user_agent="pandas-{}".format(pandas.__version__) - ) - except ImportError: - client_info = None + client_info = google.api_core.client_info.ClientInfo( + user_agent="pandas-{}".format(pandas.__version__) + ) # In addition to new enough version of google-api-core, a new enough # version of google-cloud-bigquery is required to populate the @@ -1070,7 +1063,7 @@ def to_gbq( DeprecationWarning, stacklevel=2, ) - elif api_method == "load_csv": + else: warnings.warn( "chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq", PendingDeprecationWarning, diff --git a/setup.py b/setup.py index 4be5a722..2e596cc6 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,9 @@ "pandas >=0.24.2", "pyarrow >=3.0.0, <7.0dev", "pydata-google-auth", - "google-auth", - "google-auth-oauthlib", + "google-api-core >=1.14.0", + "google-auth >=1.4.1", + "google-auth-oauthlib >=0.0.1", # 2.4.* has a bug where waiting for the query can hang indefinitely. # https://github.com/pydata/pandas-gbq/issues/343 "google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<4.0.0dev,!=2.4.*", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 6c3080dc..2a500f35 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -6,6 +6,7 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 db-dtypes==0.3.1 +google-api-core==1.14.0 google-auth==1.4.1 google-auth-oauthlib==0.0.1 google-cloud-bigquery==1.11.1 From 0f9baa8fabcd9a7811a431156c1eeaa9d5329079 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 23 Dec 2021 23:11:58 +0000 Subject: [PATCH 33/37] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/noxfile.py b/noxfile.py index 887b6fd6..ffa7930d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -36,7 +36,6 @@ # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ - "unit_noextras", "unit", "system", "cover", @@ -80,7 +79,7 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -def default(session, install_extras=True): +def default(session): # Install all test dependencies, then install this package in-place. constraints_path = str( @@ -96,11 +95,7 @@ def default(session, install_extras=True): constraints_path, ) - if install_extras: - install_target = ".[tqdm]" - else: - install_target = "." - session.install("-e", install_target, "-c", constraints_path) + session.install("-e", ".[tqdm]", "-c", constraints_path) # Run py.test against the unit tests. session.run( @@ -124,12 +119,6 @@ def unit(session): default(session) -@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) -def unit_noextras(session): - """Run the unit test suite.""" - default(session, install_extras=False) - - @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" From 7ac9b6a434eec9483ba73e8d857c6797df79553e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Dec 2021 10:39:35 -0600 Subject: [PATCH 34/37] remove system test for private delete method --- tests/system/test_gbq.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 812d2089..67735c53 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -1522,11 +1522,6 @@ def test_delete_table(gbq_table): assert not gbq_table.exists("test_delete_table") -def test_delete_table_not_found(gbq_table): - with pytest.raises(gbq.NotFoundException): - gbq_table.delete("test_delete_table_not_found") - - def test_create_table_data_dataset_does_not_exist( project, credentials, gbq_dataset, random_dataset_id ): From d562ee9ee6885996a43baf4ab6cfae58df0058c3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Dec 2021 10:42:18 -0600 Subject: [PATCH 35/37] check number of columns --- tests/unit/test_load.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index b35c735c..24f262e6 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -157,6 +157,7 @@ def test_load_csv_from_file_generates_schema(mock_bigquery_client): _, kwargs = mock_load.call_args assert "job_config" in kwargs sent_schema = kwargs["job_config"].schema + assert len(sent_schema) == len(df.columns) assert sent_schema[0].name == "int_col" assert sent_schema[0].field_type == "INTEGER" assert sent_schema[1].name == "bool_col" From 55999155c273c5c3787299feb2f85771daec14c6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Dec 2021 11:32:15 -0600 Subject: [PATCH 36/37] coverage dropped due to removed code --- owlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlbot.py b/owlbot.py index 46b80d58..62c9f3c4 100644 --- a/owlbot.py +++ b/owlbot.py @@ -33,7 +33,7 @@ templated_files = common.py_library( unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"], system_test_python_versions=["3.7", "3.8", "3.9", "3.10"], - cov_level=95, + cov_level=94, unit_test_extras=extras, system_test_extras=extras, intersphinx_dependencies={ From 5223fa4b363bda0af04620b2db4461f34b146ac3 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 28 Dec 2021 17:34:14 +0000 Subject: [PATCH 37/37] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .coveragerc | 2 +- noxfile.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index cc06722c..0a3b1cea 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,7 +22,7 @@ omit = google/cloud/__init__.py [report] -fail_under = 95 +fail_under = 94 show_missing = True exclude_lines = # Re-enable the standard pragma diff --git a/noxfile.py b/noxfile.py index ffa7930d..5e41983b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=95") + session.run("coverage", "report", "--show-missing", "--fail-under=94") session.run("coverage", "erase")