fix: fix the default value for na_value for numpy conversions (#1766)

sycai · web-flow · commit 0629cac7f9a9 · 2025-05-29T10:54:47.000-07:00
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -47,6 +47,7 @@
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas
+from pandas.api import extensions as pd_ext
 import pandas.io.formats.format
 import pyarrow
 import tabulate
@@ -4097,7 +4098,7 @@ def to_numpy(
         self,
         dtype=None,
         copy=False,
-        na_value=None,
+        na_value=pd_ext.no_default,
         *,
         allow_large_results=None,
         **kwargs,
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -42,6 +42,7 @@
 import google.cloud.bigquery as bigquery
 import numpy
 import pandas
+from pandas.api import extensions as pd_ext
 import pandas.core.dtypes.common
 import pyarrow as pa
 import typing_extensions
@@ -2109,7 +2110,7 @@ def to_numpy(
         self,
         dtype=None,
         copy=False,
-        na_value=None,
+        na_value=pd_ext.no_default,
         *,
         allow_large_results=None,
         **kwargs,
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
@@ -15,6 +15,8 @@
 from typing import Tuple
 
 import google.api_core.exceptions
+import numpy
+import numpy.testing
 import pandas as pd
 import pandas.testing
 import pyarrow as pa
@@ -1061,3 +1063,12 @@ def test_to_sql_query_named_index_excluded(
     utils.assert_pandas_df_equal(
         roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
     )
+
+
+def test_to_numpy(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+
+    bf_result = numpy.array(bf_df[["int64_too"]], dtype="int64")
+    pd_result = numpy.array(pd_df[["int64_too"]], dtype="int64")
+
+    numpy.testing.assert_array_equal(bf_result, pd_result)
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -2132,7 +2132,7 @@ def test_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, col_na
     ],
 )
 def test_unique(scalars_df_index, scalars_pandas_df_index, col_name):
-    bf_uniq = scalars_df_index[col_name].unique().to_numpy()
+    bf_uniq = scalars_df_index[col_name].unique().to_numpy(na_value=None)
     pd_uniq = scalars_pandas_df_index[col_name].unique()
     numpy.array_equal(pd_uniq, bf_uniq)
 
diff --git a/tests/system/small/test_series_io.py b/tests/system/small/test_series_io.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import numpy
+import numpy.testing
 import pandas as pd
 import pytest
 
@@ -114,3 +116,12 @@ def test_to_pandas_batches(scalars_dfs, page_size, max_results, allow_large_resu
         total_rows += actual_rows
 
     assert total_rows == expected_total_rows
+
+
+def test_to_numpy(scalars_dfs):
+    bf_df, pd_df = scalars_dfs
+
+    bf_result = numpy.array(bf_df["int64_too"], dtype="int64")
+    pd_result = numpy.array(pd_df["int64_too"], dtype="int64")
+
+    numpy.testing.assert_array_equal(bf_result, pd_result)
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -17,6 +17,7 @@
 import bigframes_vendored.pandas.core.generic as generic
 import numpy as np
 import pandas as pd
+from pandas.api import extensions as pd_ext
 
 # -----------------------------------------------------------------------
 # DataFrame class
@@ -369,7 +370,7 @@ def to_numpy(
         self,
         dtype=None,
         copy=False,
-        na_value=None,
+        na_value=pd_ext.no_default,
         *,
         allow_large_results=None,
         **kwargs,
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
@@ -19,8 +19,8 @@
 from bigframes_vendored.pandas.core.generic import NDFrame
 import numpy
 import numpy as np
-from pandas._libs import lib
 from pandas._typing import Axis, FilePath, NaPosition, WriteBuffer
+from pandas.api import extensions as pd_ext
 
 from bigframes import constants
 
@@ -323,7 +323,7 @@ def reset_index(
         self,
         *,
         drop: bool = False,
-        name=lib.no_default,
+        name=pd_ext.no_default,
     ) -> DataFrame | Series | None:
         """
         Generate a new DataFrame or Series with the index reset.
@@ -730,7 +730,9 @@ def tolist(self, *, allow_large_results: Optional[bool] = None) -> list:
 
     to_list = tolist
 
-    def to_numpy(self, dtype, copy=False, na_value=None, *, allow_large_results=None):
+    def to_numpy(
+        self, dtype, copy=False, na_value=pd_ext.no_default, *, allow_large_results=None
+    ):
         """
         A NumPy ndarray representing the values in this Series or Index.
 

Original file line number	Diff line number	Diff line change
`@@ -2132,7 +2132,7 @@ def test_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, col_na`
`2132`	`2132`	`],`
`2133`	`2133`	`)`
`2134`	`2134`	`def test_unique(scalars_df_index, scalars_pandas_df_index, col_name):`
`2135`		`- bf_uniq = scalars_df_index[col_name].unique().to_numpy()`
	`2135`	`+ bf_uniq = scalars_df_index[col_name].unique().to_numpy(na_value=None)`
`2136`	`2136`	`pd_uniq = scalars_pandas_df_index[col_name].unique()`
`2137`	`2137`	`numpy.array_equal(pd_uniq, bf_uniq)`
`2138`	`2138`