From ebdc68382a43cf2ba9c53a4e10fbb2e55e02f386 Mon Sep 17 00:00:00 2001 From: Christian Juncker Braedstrup Date: Mon, 28 Jun 2021 14:26:52 +0200 Subject: [PATCH 1/4] fix: use pandas function to check for NaN Starting with pandas 1.0, an experimental pandas.NA value (singleton) is available to represent scalar missing values as opposed to numpy.nan. Comparing the variable with itself results in a pandas.NA value that doesn't support type-casting to boolean. Using the build-in pandas.isna function handles all pandas supported NaN values. --- google/cloud/bigquery/_pandas_helpers.py | 2 +- tests/unit/test__pandas_helpers.py | 29 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index e93a99eba..285c0e83c 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -780,7 +780,7 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if value != value: + if pandas.isna(value): continue output[column] = value yield output diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 39a3d845b..eda956479 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -734,6 +734,35 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_json_generator(module_under_test): + utcnow = datetime.datetime.utcnow() + df_data = collections.OrderedDict( + [ + ("a_series", [pandas.NA, 2, 3, 4]), + ("b_series", [0.1, float("NaN"), 0.3, 0.4]), + ("c_series", ["a", "b", pandas.NA, "d"]), + ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]), + ("e_series", [True, False, True, None]), + ] + ) + dataframe = pandas.DataFrame( + df_data, index=pandas.Index([4, 5, 6, 7], name="a_index") + ) + + dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()}) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + expected = [ + {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True}, + {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False}, + {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True}, + {"a_series": 4, "b_series": 0.4, "c_series": "d"}, + ] + for row, expect in zip(rows, expected): + assert row == expect + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( From f86d103d3d9275fa0c4a515eb4aadc32101907cc Mon Sep 17 00:00:00 2001 From: Christian Juncker Braedstrup Date: Mon, 12 Jul 2021 13:50:27 +0200 Subject: [PATCH 2/4] tests: Skip tests if pandas below required version --- tests/unit/test__pandas_helpers.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index eda956479..0550b8c1c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -19,6 +19,7 @@ import operator import queue import warnings +import pkg_resources import mock @@ -47,6 +48,14 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -734,7 +743,10 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipIf( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, + reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA", +) def test_dataframe_to_json_generator(module_under_test): utcnow = datetime.datetime.utcnow() df_data = collections.OrderedDict( From 659e9f74e4f6b480e823b24eb8fc842ab42a2956 Mon Sep 17 00:00:00 2001 From: Christian Juncker Braedstrup Date: Mon, 12 Jul 2021 14:11:43 +0200 Subject: [PATCH 3/4] tests: compare expected and actual directly as lists --- tests/unit/test__pandas_helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0550b8c1c..e52095569 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -771,8 +771,7 @@ def test_dataframe_to_json_generator(module_under_test): {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True}, {"a_series": 4, "b_series": 0.4, "c_series": "d"}, ] - for row, expect in zip(rows, expected): - assert row == expect + assert list(rows) == expected @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") From dc0529b3ede930dd90bd9f0ace1b9724043adc12 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 12 Jul 2021 17:41:11 +0200 Subject: [PATCH 4/4] Fix pytest.mark.skipif spelling --- tests/unit/test__pandas_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index e52095569..aa87e28f5 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -743,7 +743,7 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected -@pytest.mark.skipIf( +@pytest.mark.skipif( pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA", )