Skip to content

Commit

Permalink
test: improve to_gbq logic unit test coverage (#449)
Browse files Browse the repository at this point in the history
* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
tswast and gcf-owl-bot[bot] authored Dec 28, 2021
1 parent bf0e863 commit 3ae5d4c
Show file tree
Hide file tree
Showing 18 changed files with 542 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ omit =
google/cloud/__init__.py

[report]
fail_under = 89
fail_under = 94
show_missing = True
exclude_lines =
# Re-enable the standard pragma
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def cover(session):
test runs (not system test runs), and then erases coverage data.
"""
session.install("coverage", "pytest-cov")
session.run("coverage", "report", "--show-missing", "--fail-under=89")
session.run("coverage", "report", "--show-missing", "--fail-under=94")

session.run("coverage", "erase")

Expand Down
2 changes: 1 addition & 1 deletion owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
templated_files = common.py_library(
unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
system_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
cov_level=89,
cov_level=94,
unit_test_extras=extras,
system_test_extras=extras,
intersphinx_dependencies={
Expand Down
41 changes: 23 additions & 18 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,20 @@ class InvalidSchema(ValueError):
table in BigQuery.
"""

pass
def __init__(
self, message: str, local_schema: Dict[str, Any], remote_schema: Dict[str, Any]
):
super().__init__(message)
self._local_schema = local_schema
self._remote_schema = remote_schema

@property
def local_schema(self) -> Dict[str, Any]:
return self._local_schema

@property
def remote_schema(self) -> Dict[str, Any]:
return self._remote_schema


class NotFoundException(ValueError):
Expand Down Expand Up @@ -354,19 +367,12 @@ def sizeof_fmt(num, suffix="B"):
return fmt % (num, "Y", suffix)

def get_client(self):
import google.api_core.client_info
import pandas

try:
# This module was added in google-api-core 1.11.0.
# We don't have a hard requirement on that version, so only
# populate the client_info if available.
import google.api_core.client_info

client_info = google.api_core.client_info.ClientInfo(
user_agent="pandas-{}".format(pandas.__version__)
)
except ImportError:
client_info = None
client_info = google.api_core.client_info.ClientInfo(
user_agent="pandas-{}".format(pandas.__version__)
)

# In addition to new enough version of google-api-core, a new enough
# version of google-cloud-bigquery is required to populate the
Expand Down Expand Up @@ -1057,7 +1063,7 @@ def to_gbq(
DeprecationWarning,
stacklevel=2,
)
elif api_method == "load_csv":
else:
warnings.warn(
"chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq",
PendingDeprecationWarning,
Expand Down Expand Up @@ -1122,12 +1128,14 @@ def to_gbq(
)
elif if_exists == "replace":
connector.delete_and_recreate_table(dataset_id, table_id, table_schema)
elif if_exists == "append":
else:
if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema):
raise InvalidSchema(
"Please verify that the structure and "
"data types in the DataFrame match the "
"schema of the destination table."
"schema of the destination table.",
table_schema,
original_schema,
)

# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
Expand Down Expand Up @@ -1283,9 +1291,6 @@ def delete(self, table_id):
"""
from google.api_core.exceptions import NotFound

if not self.exists(table_id):
raise NotFoundException("Table does not exist")

table_ref = self._table_ref(table_id)
try:
self.client.delete_table(table_ref)
Expand Down
7 changes: 6 additions & 1 deletion pandas_gbq/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ def load_csv_from_file(
chunksize: Optional[int],
schema: Optional[Dict[str, Any]],
):
"""Manually encode a DataFrame to CSV and use the buffer in a load job.
This method is needed for writing with google-cloud-bigquery versions that
don't implment load_table_from_dataframe with the CSV serialization format.
"""
if schema is None:
schema = pandas_gbq.schema.generate_bq_schema(dataframe)

Expand All @@ -203,7 +208,7 @@ def load_chunk(chunk, job_config):
finally:
chunk_buffer.close()

return load_csv(dataframe, chunksize, bq_schema, load_chunk,)
return load_csv(dataframe, chunksize, bq_schema, load_chunk)


def load_chunks(
Expand Down
14 changes: 13 additions & 1 deletion pandas_gbq/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,19 @@ def to_pandas_gbq(client_schema):
"""Given a sequence of :class:`google.cloud.bigquery.schema.SchemaField`,
return a schema in pandas-gbq API format.
"""
remote_fields = [field_remote.to_api_repr() for field_remote in client_schema]
remote_fields = [
# Filter out default values. google-cloud-bigquery versions before
# 2.31.0 (https://github.com/googleapis/python-bigquery/pull/557)
# include a description key, even if not explicitly set. This has the
# potential to unset the description unintentionally in cases where
# pandas-gbq is updating the schema.
{
key: value
for key, value in field_remote.to_api_repr().items()
if value is not None
}
for field_remote in client_schema
]
for field in remote_fields:
field["type"] = field["type"].upper()
field["mode"] = field["mode"].upper()
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
"pandas >=0.24.2",
"pyarrow >=3.0.0, <7.0dev",
"pydata-google-auth",
"google-auth",
"google-auth-oauthlib",
"google-api-core >=1.14.0",
"google-auth >=1.4.1",
"google-auth-oauthlib >=0.0.1",
# 2.4.* has a bug where waiting for the query can hang indefinitely.
# https://github.com/pydata/pandas-gbq/issues/343
"google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<4.0.0dev,!=2.4.*",
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
# Then this file should have foo==1.14.0
db-dtypes==0.3.1
google-api-core==1.14.0
google-auth==1.4.1
google-auth-oauthlib==0.0.1
google-cloud-bigquery==1.11.1
Expand Down
5 changes: 0 additions & 5 deletions tests/system/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -1522,11 +1522,6 @@ def test_delete_table(gbq_table):
assert not gbq_table.exists("test_delete_table")


def test_delete_table_not_found(gbq_table):
with pytest.raises(gbq.NotFoundException):
gbq_table.delete("test_delete_table_not_found")


def test_create_table_data_dataset_does_not_exist(
project, credentials, gbq_dataset, random_dataset_id
):
Expand Down
59 changes: 30 additions & 29 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,36 @@
import pytest


def mock_get_credentials(*args, **kwargs):
import google.auth.credentials

mock_credentials = mock.create_autospec(google.auth.credentials.Credentials)
return mock_credentials, "default-project"


@pytest.fixture
def mock_service_account_credentials():
import google.oauth2.service_account

mock_credentials = mock.create_autospec(google.oauth2.service_account.Credentials)
return mock_credentials


@pytest.fixture
def mock_compute_engine_credentials():
import google.auth.compute_engine

mock_credentials = mock.create_autospec(google.auth.compute_engine.Credentials)
return mock_credentials


@pytest.fixture(autouse=True)
def no_auth(monkeypatch):
import pydata_google_auth

monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials)


@pytest.fixture(autouse=True, scope="function")
def reset_context():
import pandas_gbq
Expand All @@ -20,41 +50,12 @@ def reset_context():
@pytest.fixture(autouse=True)
def mock_bigquery_client(monkeypatch):
import google.cloud.bigquery
import google.cloud.bigquery.table

mock_client = mock.create_autospec(google.cloud.bigquery.Client)
# Constructor returns the mock itself, so this mock can be treated as the
# constructor or the instance.
mock_client.return_value = mock_client

mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
mock_query.job_id = "some-random-id"
mock_query.state = "DONE"
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
mock_rows.total_rows = 1

mock_rows.__iter__.return_value = [(1,)]
mock_query.result.return_value = mock_rows
mock_client.list_rows.return_value = mock_rows
mock_client.query.return_value = mock_query
# Mock table creation.
monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client)
mock_client.reset_mock()

# Mock out SELECT 1 query results.
def generate_schema():
query = mock_client.query.call_args[0][0] if mock_client.query.call_args else ""
if query == "SELECT 1 AS int_col":
return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")]
else:
return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")]

type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema)

# Mock out get_table.
def get_table(table_ref_or_id, **kwargs):
return google.cloud.bigquery.Table(table_ref_or_id)

mock_client.get_table.side_effect = get_table

return mock_client
25 changes: 11 additions & 14 deletions tests/unit/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,32 @@ def test_get_credentials_default_credentials(monkeypatch):
import google.auth
import google.auth.credentials
import google.cloud.bigquery
import pydata_google_auth

def mock_default_credentials(scopes=None, request=None):
return (
mock.create_autospec(google.auth.credentials.Credentials),
"default-project",
)
mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials)

def mock_default_credentials(scopes, **kwargs):
return (mock_user_credentials, "test-project")

monkeypatch.setattr(google.auth, "default", mock_default_credentials)
monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials)

credentials, project = auth.get_credentials()
assert project == "default-project"
assert project == "test-project"
assert credentials is not None


def test_get_credentials_load_user_no_default(monkeypatch):
import google.auth
import google.auth.credentials
import pydata_google_auth
import pydata_google_auth.cache

def mock_default_credentials(scopes=None, request=None):
return (None, None)

monkeypatch.setattr(google.auth, "default", mock_default_credentials)
mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials)

mock_cache = mock.create_autospec(pydata_google_auth.cache.CredentialsCache)
mock_cache.load.return_value = mock_user_credentials
def mock_default_credentials(scopes, **kwargs):
return (mock_user_credentials, None)

monkeypatch.setattr(auth, "get_credentials_cache", lambda _: mock_cache)
monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials)

credentials, project = auth.get_credentials()
assert project is None
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,25 @@

from unittest import mock

import google.cloud.bigquery
import google.cloud.bigquery.table
import pytest


@pytest.fixture(autouse=True)
def default_bigquery_client(mock_bigquery_client):
mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
mock_query.job_id = "some-random-id"
mock_query.state = "DONE"
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
mock_rows.total_rows = 1
mock_rows.__iter__.return_value = [(1,)]
mock_query.result.return_value = mock_rows
mock_bigquery_client.list_rows.return_value = mock_rows
mock_bigquery_client.query.return_value = mock_query
return mock_bigquery_client


@pytest.fixture(autouse=True)
def mock_get_credentials(monkeypatch):
from pandas_gbq import auth
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
@pytest.fixture(autouse=True)
def fresh_bigquery_version(monkeypatch):
monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None)
monkeypatch.setattr(FEATURES, "_pandas_installed_version", None)


@pytest.mark.parametrize(
Expand All @@ -28,3 +29,21 @@ def test_bigquery_has_from_dataframe_with_csv(monkeypatch, bigquery_version, exp

monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version)
assert FEATURES.bigquery_has_from_dataframe_with_csv == expected


@pytest.mark.parametrize(
["pandas_version", "expected"],
[
("0.14.7", False),
("0.22.1", False),
("0.23.0", True),
("0.23.1", True),
("1.0.0", True),
("2.1.3", True),
],
)
def test_pandas_has_deprecated_verbose(monkeypatch, pandas_version, expected):
import pandas

monkeypatch.setattr(pandas, "__version__", pandas_version)
assert FEATURES.pandas_has_deprecated_verbose == expected
Loading

0 comments on commit 3ae5d4c

Please sign in to comment.