Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: improve to_gbq logic unit test coverage #449

Merged
merged 42 commits into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
9a9d3fd
feat: accepts a table ID, which downloads the table without a query
tswast Dec 6, 2021
6adf233
add todo for next steps
tswast Dec 6, 2021
73a791a
Merge remote-tracking branch 'upstream/main' into issue266-read_gbq-n…
tswast Dec 9, 2021
9b1eb0d
add unit test for table ID read_gbq
tswast Dec 9, 2021
ec9ddaf
add helper for is_query
tswast Dec 9, 2021
9cc7c74
implement read_gbq with table id
tswast Dec 10, 2021
dd51ad8
fix remaining tests, don't localalize out-of-bounds timestamp columns
tswast Dec 10, 2021
e1ad679
Update pandas_gbq/gbq.py
tswast Dec 10, 2021
d29bc2a
fix 3.7 unit tests
tswast Dec 10, 2021
cb8f24f
correct coverage
tswast Dec 10, 2021
56b73b2
skip coverage for optional test skip
tswast Dec 10, 2021
8a61e97
fix docs build
tswast Dec 10, 2021
3f7900b
improve test coverage for error case
tswast Dec 10, 2021
3c53f1f
as of google-cloud-bigquery 1.11.0, get_table before list_rows is unn…
tswast Dec 13, 2021
5ce125f
tests with whitespace
tswast Dec 20, 2021
ea660f4
type annotations
tswast Dec 20, 2021
3d93c78
test: improve unit test coverage
tswast Dec 20, 2021
93e872e
boost coverage
tswast Dec 20, 2021
e0ae455
🦉 Updates from OwlBot
gcf-owl-bot[bot] Dec 20, 2021
28b72f0
boost coverage
tswast Dec 21, 2021
ed61f6d
Merge remote-tracking branch 'upstream/issue392-unit-test-100' into i…
tswast Dec 21, 2021
1d6831f
🦉 Updates from OwlBot
gcf-owl-bot[bot] Dec 21, 2021
fcf8276
finish coverage for load.py
tswast Dec 21, 2021
dfb107e
Merge branch 'issue392-unit-test-100' of github.com:googleapis/python…
tswast Dec 21, 2021
76b38a3
another test
tswast Dec 21, 2021
2fd1e32
refactor gbq tests
tswast Dec 21, 2021
d97102e
less intense refactoring
tswast Dec 22, 2021
c1f8055
Merge remote-tracking branch 'upstream/main' into issue392-unit-test-100
tswast Dec 22, 2021
c48f997
more refactor cleanup
tswast Dec 22, 2021
9e67138
more tests
tswast Dec 22, 2021
83c4513
add to_gbq tests
tswast Dec 23, 2021
4f12c78
boost coverage
tswast Dec 23, 2021
4fedaaf
🦉 Updates from OwlBot
gcf-owl-bot[bot] Dec 23, 2021
2bfd5a1
cover new properties
tswast Dec 23, 2021
24574a8
Merge branch 'issue392-unit-test-100' of github.com:googleapis/python…
tswast Dec 23, 2021
3cb788e
unknown if_exists
tswast Dec 23, 2021
95f0478
add session for no deps
tswast Dec 23, 2021
0f9baa8
🦉 Updates from OwlBot
gcf-owl-bot[bot] Dec 23, 2021
7ac9b6a
remove system test for private delete method
tswast Dec 28, 2021
d562ee9
check number of columns
tswast Dec 28, 2021
5599915
coverage dropped due to removed code
tswast Dec 28, 2021
5223fa4
🦉 Updates from OwlBot
gcf-owl-bot[bot] Dec 28, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ omit =
google/cloud/__init__.py

[report]
fail_under = 89
fail_under = 94
show_missing = True
exclude_lines =
# Re-enable the standard pragma
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def cover(session):
test runs (not system test runs), and then erases coverage data.
"""
session.install("coverage", "pytest-cov")
session.run("coverage", "report", "--show-missing", "--fail-under=89")
session.run("coverage", "report", "--show-missing", "--fail-under=94")

session.run("coverage", "erase")

Expand Down
2 changes: 1 addition & 1 deletion owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
templated_files = common.py_library(
unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
system_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
cov_level=89,
cov_level=94,
unit_test_extras=extras,
system_test_extras=extras,
intersphinx_dependencies={
Expand Down
41 changes: 23 additions & 18 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,20 @@ class InvalidSchema(ValueError):
table in BigQuery.
"""

pass
def __init__(
self, message: str, local_schema: Dict[str, Any], remote_schema: Dict[str, Any]
plamut marked this conversation as resolved.
Show resolved Hide resolved
):
super().__init__(message)
self._local_schema = local_schema
self._remote_schema = remote_schema

@property
def local_schema(self) -> Dict[str, Any]:
return self._local_schema

@property
def remote_schema(self) -> Dict[str, Any]:
return self._remote_schema


class NotFoundException(ValueError):
Expand Down Expand Up @@ -354,19 +367,12 @@ def sizeof_fmt(num, suffix="B"):
return fmt % (num, "Y", suffix)

def get_client(self):
import google.api_core.client_info
import pandas

try:
# This module was added in google-api-core 1.11.0.
# We don't have a hard requirement on that version, so only
# populate the client_info if available.
import google.api_core.client_info

client_info = google.api_core.client_info.ClientInfo(
user_agent="pandas-{}".format(pandas.__version__)
)
except ImportError:
client_info = None
client_info = google.api_core.client_info.ClientInfo(
user_agent="pandas-{}".format(pandas.__version__)
)

# In addition to new enough version of google-api-core, a new enough
# version of google-cloud-bigquery is required to populate the
Expand Down Expand Up @@ -1057,7 +1063,7 @@ def to_gbq(
DeprecationWarning,
stacklevel=2,
)
elif api_method == "load_csv":
else:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's already a check above for known api_methods, so this was an impossible branch.

warnings.warn(
"chunksize will be ignored when using api_method='load_csv' in a future version of pandas-gbq",
PendingDeprecationWarning,
Expand Down Expand Up @@ -1122,12 +1128,14 @@ def to_gbq(
)
elif if_exists == "replace":
connector.delete_and_recreate_table(dataset_id, table_id, table_schema)
elif if_exists == "append":
else:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's already a check above for known if_exists, so this was an impossible branch.

if not pandas_gbq.schema.schema_is_subset(original_schema, table_schema):
raise InvalidSchema(
"Please verify that the structure and "
"data types in the DataFrame match the "
"schema of the destination table."
"schema of the destination table.",
table_schema,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Towards #349

original_schema,
)

# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
Expand Down Expand Up @@ -1283,9 +1291,6 @@ def delete(self, table_id):
"""
from google.api_core.exceptions import NotFound

if not self.exists(table_id):
raise NotFoundException("Table does not exist")

table_ref = self._table_ref(table_id)
try:
self.client.delete_table(table_ref)
Expand Down
7 changes: 6 additions & 1 deletion pandas_gbq/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ def load_csv_from_file(
chunksize: Optional[int],
schema: Optional[Dict[str, Any]],
):
"""Manually encode a DataFrame to CSV and use the buffer in a load job.

This method is needed for writing with google-cloud-bigquery versions that
don't implment load_table_from_dataframe with the CSV serialization format.
"""
if schema is None:
schema = pandas_gbq.schema.generate_bq_schema(dataframe)

Expand All @@ -203,7 +208,7 @@ def load_chunk(chunk, job_config):
finally:
chunk_buffer.close()

return load_csv(dataframe, chunksize, bq_schema, load_chunk,)
return load_csv(dataframe, chunksize, bq_schema, load_chunk)


def load_chunks(
Expand Down
14 changes: 13 additions & 1 deletion pandas_gbq/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,19 @@ def to_pandas_gbq(client_schema):
"""Given a sequence of :class:`google.cloud.bigquery.schema.SchemaField`,
return a schema in pandas-gbq API format.
"""
remote_fields = [field_remote.to_api_repr() for field_remote in client_schema]
remote_fields = [
# Filter out default values. google-cloud-bigquery versions before
# 2.31.0 (https://github.com/googleapis/python-bigquery/pull/557)
# include a description key, even if not explicitly set. This has the
# potential to unset the description unintentionally in cases where
# pandas-gbq is updating the schema.
{
key: value
for key, value in field_remote.to_api_repr().items()
if value is not None
}
for field_remote in client_schema
]
for field in remote_fields:
field["type"] = field["type"].upper()
field["mode"] = field["mode"].upper()
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
"pandas >=0.24.2",
"pyarrow >=3.0.0, <7.0dev",
"pydata-google-auth",
"google-auth",
"google-auth-oauthlib",
"google-api-core >=1.14.0",
"google-auth >=1.4.1",
"google-auth-oauthlib >=0.0.1",
# 2.4.* has a bug where waiting for the query can hang indefinitely.
# https://github.com/pydata/pandas-gbq/issues/343
"google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<4.0.0dev,!=2.4.*",
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
# Then this file should have foo==1.14.0
db-dtypes==0.3.1
google-api-core==1.14.0
google-auth==1.4.1
google-auth-oauthlib==0.0.1
google-cloud-bigquery==1.11.1
Expand Down
5 changes: 0 additions & 5 deletions tests/system/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -1522,11 +1522,6 @@ def test_delete_table(gbq_table):
assert not gbq_table.exists("test_delete_table")


def test_delete_table_not_found(gbq_table):
with pytest.raises(gbq.NotFoundException):
gbq_table.delete("test_delete_table_not_found")


def test_create_table_data_dataset_does_not_exist(
project, credentials, gbq_dataset, random_dataset_id
):
Expand Down
59 changes: 30 additions & 29 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,36 @@
import pytest


def mock_get_credentials(*args, **kwargs):
import google.auth.credentials

mock_credentials = mock.create_autospec(google.auth.credentials.Credentials)
return mock_credentials, "default-project"


@pytest.fixture
def mock_service_account_credentials():
import google.oauth2.service_account

mock_credentials = mock.create_autospec(google.oauth2.service_account.Credentials)
return mock_credentials


@pytest.fixture
def mock_compute_engine_credentials():
import google.auth.compute_engine

mock_credentials = mock.create_autospec(google.auth.compute_engine.Credentials)
return mock_credentials


@pytest.fixture(autouse=True)
def no_auth(monkeypatch):
import pydata_google_auth

monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials)


@pytest.fixture(autouse=True, scope="function")
def reset_context():
import pandas_gbq
Expand All @@ -20,41 +50,12 @@ def reset_context():
@pytest.fixture(autouse=True)
def mock_bigquery_client(monkeypatch):
import google.cloud.bigquery
import google.cloud.bigquery.table

mock_client = mock.create_autospec(google.cloud.bigquery.Client)
# Constructor returns the mock itself, so this mock can be treated as the
# constructor or the instance.
mock_client.return_value = mock_client

mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
mock_query.job_id = "some-random-id"
mock_query.state = "DONE"
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
mock_rows.total_rows = 1

mock_rows.__iter__.return_value = [(1,)]
mock_query.result.return_value = mock_rows
mock_client.list_rows.return_value = mock_rows
mock_client.query.return_value = mock_query
# Mock table creation.
monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client)
mock_client.reset_mock()

# Mock out SELECT 1 query results.
def generate_schema():
query = mock_client.query.call_args[0][0] if mock_client.query.call_args else ""
if query == "SELECT 1 AS int_col":
return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")]
else:
return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")]

type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema)

# Mock out get_table.
def get_table(table_ref_or_id, **kwargs):
return google.cloud.bigquery.Table(table_ref_or_id)

mock_client.get_table.side_effect = get_table
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was messing with the to_gbq tests, so moved to test_gbq.py (and potentially to a test_read_gbq.py in future.


return mock_client
25 changes: 11 additions & 14 deletions tests/unit/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,32 @@ def test_get_credentials_default_credentials(monkeypatch):
import google.auth
import google.auth.credentials
import google.cloud.bigquery
import pydata_google_auth

def mock_default_credentials(scopes=None, request=None):
return (
mock.create_autospec(google.auth.credentials.Credentials),
"default-project",
)
mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials)

def mock_default_credentials(scopes, **kwargs):
return (mock_user_credentials, "test-project")

monkeypatch.setattr(google.auth, "default", mock_default_credentials)
monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials)

credentials, project = auth.get_credentials()
assert project == "default-project"
assert project == "test-project"
assert credentials is not None


def test_get_credentials_load_user_no_default(monkeypatch):
import google.auth
import google.auth.credentials
import pydata_google_auth
import pydata_google_auth.cache

def mock_default_credentials(scopes=None, request=None):
return (None, None)

monkeypatch.setattr(google.auth, "default", mock_default_credentials)
mock_user_credentials = mock.create_autospec(google.auth.credentials.Credentials)

mock_cache = mock.create_autospec(pydata_google_auth.cache.CredentialsCache)
mock_cache.load.return_value = mock_user_credentials
def mock_default_credentials(scopes, **kwargs):
return (mock_user_credentials, None)

monkeypatch.setattr(auth, "get_credentials_cache", lambda _: mock_cache)
monkeypatch.setattr(pydata_google_auth, "default", mock_default_credentials)

credentials, project = auth.get_credentials()
assert project is None
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,25 @@

from unittest import mock

import google.cloud.bigquery
import google.cloud.bigquery.table
import pytest


@pytest.fixture(autouse=True)
def default_bigquery_client(mock_bigquery_client):
mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
mock_query.job_id = "some-random-id"
mock_query.state = "DONE"
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
mock_rows.total_rows = 1
mock_rows.__iter__.return_value = [(1,)]
mock_query.result.return_value = mock_rows
mock_bigquery_client.list_rows.return_value = mock_rows
mock_bigquery_client.query.return_value = mock_query
return mock_bigquery_client


@pytest.fixture(autouse=True)
def mock_get_credentials(monkeypatch):
from pandas_gbq import auth
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
@pytest.fixture(autouse=True)
def fresh_bigquery_version(monkeypatch):
monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None)
monkeypatch.setattr(FEATURES, "_pandas_installed_version", None)


@pytest.mark.parametrize(
Expand All @@ -28,3 +29,21 @@ def test_bigquery_has_from_dataframe_with_csv(monkeypatch, bigquery_version, exp

monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version)
assert FEATURES.bigquery_has_from_dataframe_with_csv == expected


@pytest.mark.parametrize(
["pandas_version", "expected"],
[
("0.14.7", False),
("0.22.1", False),
("0.23.0", True),
("0.23.1", True),
("1.0.0", True),
("2.1.3", True),
],
)
def test_pandas_has_deprecated_verbose(monkeypatch, pandas_version, expected):
import pandas

monkeypatch.setattr(pandas, "__version__", pandas_version)
assert FEATURES.pandas_has_deprecated_verbose == expected
Loading