googleapis · plamut · Jul 12, 2021 · Jun 28, 2021 · Jul 12, 2021 · Jul 12, 2021
@@ -780,7 +780,7 @@ def dataframe_to_json_generator(dataframe):
         output = {}
         for column, value in zip(dataframe.columns, row):
             # Omit NaN values.
-            if value != value:
+            if pandas.isna(value):
                 continue
             output[column] = value
         yield output
@@ -19,6 +19,7 @@
 import operator
 import queue
 import warnings
+import pkg_resources
 
 import mock
 
@@ -47,6 +48,14 @@
 except ImportError:  # pragma: NO COVER
     bigquery_storage = None
 
+PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
+
+if pandas is not None:
+    PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
+else:
+    # Set to less than MIN version.
+    PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0")
+
 
 skip_if_no_bignumeric = pytest.mark.skipif(
     not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0",
@@ -734,6 +743,37 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name(
     assert columns_and_indexes == expected
 
 
+@pytest.mark.skipif(
+    pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
+    reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA",
+)
+def test_dataframe_to_json_generator(module_under_test):
+    utcnow = datetime.datetime.utcnow()
+    df_data = collections.OrderedDict(
+        [
+            ("a_series", [pandas.NA, 2, 3, 4]),
+            ("b_series", [0.1, float("NaN"), 0.3, 0.4]),
+            ("c_series", ["a", "b", pandas.NA, "d"]),
+            ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]),
+            ("e_series", [True, False, True, None]),
+        ]
+    )
+    dataframe = pandas.DataFrame(
+        df_data, index=pandas.Index([4, 5, 6, 7], name="a_index")
+    )
+
+    dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()})
+
+    rows = module_under_test.dataframe_to_json_generator(dataframe)
+    expected = [
+        {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True},
+        {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False},
+        {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True},
+        {"a_series": 4, "b_series": 0.4, "c_series": "d"},
+    ]
+    assert list(rows) == expected
+
+
 @pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_list_columns_and_indexes_with_named_index(module_under_test):
     df_data = collections.OrderedDict(