-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: Fix export .to_numpy() with nullable type #41196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,8 @@ | |
from pandas.core.dtypes.common import ( | ||
is_dtype_equal, | ||
is_integer, | ||
is_integer_dtype, | ||
is_numeric_dtype, | ||
is_object_dtype, | ||
is_scalar, | ||
is_string_dtype, | ||
|
@@ -244,7 +246,12 @@ def to_numpy( # type: ignore[override] | |
|
||
Examples | ||
-------- | ||
An object-dtype is the default result | ||
Other than numerical type input (int and float), object-dtype is | ||
the default result | ||
|
||
>>> a = pd.Series([1, 2, 3], dtype=pd.Int64Dtype()) | ||
>>> a.to_numpy() | ||
array([1, 2, 3], dtype=int64) | ||
|
||
>>> a = pd.array([True, False, pd.NA], dtype="boolean") | ||
>>> a.to_numpy() | ||
|
@@ -280,10 +287,27 @@ def to_numpy( # type: ignore[override] | |
if na_value is lib.no_default: | ||
na_value = libmissing.NA | ||
if dtype is None: | ||
# error: Incompatible types in assignment (expression has type | ||
# "Type[object]", variable has type "Union[str, dtype[Any], None]") | ||
dtype = object # type: ignore[assignment] | ||
if self._hasna: | ||
if is_numeric_dtype(self): | ||
dtype = self.dtype.numpy_dtype | ||
else: | ||
# error: Incompatible types in assignment (expression has type | ||
# "Type[object]", variable has type "Union[str, dtype[Any], None]") | ||
dtype = object # type: ignore[assignment] | ||
|
||
if is_numeric_dtype(self): | ||
|
||
# If there is NA and the data is of int type, a float | ||
# is being returned as int type cannot support np.nan. | ||
if is_integer_dtype(self) and self._hasna: | ||
data = self._data.astype(float) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't this ignore any specified dtype? |
||
else: | ||
data = self._data.astype(dtype) | ||
|
||
# For numerical input, pd.na is replaced with np.nan | ||
if self._hasna is True: | ||
data[np.where(self._mask is True)] = np.nan | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line looks suspect ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, what about the specified na_value? |
||
|
||
elif self._hasna: | ||
if ( | ||
not is_object_dtype(dtype) | ||
and not is_string_dtype(dtype) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -154,6 +154,21 @@ def test_to_numpy(): | |
tm.assert_numpy_array_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data_content,data_type,expected_result", | ||
[ | ||
([1, 2, 3], pd.Float64Dtype(), np.array([1, 2, 3], dtype=np.float64)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not build the Series here instead? I think just two parameters |
||
([1, 2, 3], "Int64", np.array([1, 2, 3], dtype=np.int64)), | ||
([1, 2, pd.NA], pd.Float64Dtype(), np.array([1, 2, np.nan], dtype=np.float64)), | ||
([1, 2, pd.NA], "Int64", np.array([1, 2, np.nan], dtype=np.float64)), | ||
], | ||
) | ||
def test_to_numpy_int_float(data_content, data_type, expected_result): | ||
data = pd.Series(data_content, dtype=data_type) | ||
actual_result = data.to_numpy() | ||
assert np.array_equal(actual_result, expected_result, equal_nan=True) | ||
|
||
|
||
# ---------------------------------------------------------------------------- | ||
# Setitem | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find this sentence a bit difficult to understand, can you rephrase please?