diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c44e667d84308..c9615a6f8f4ec 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -708,6 +708,7 @@ Conversion - Bug in :func:`factorize` where, when given an array with a numeric numpy dtype lower than int64, uint64 and float64, the unique values did not keep their original dtype (:issue:`41132`) - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`) - Bug in :meth:`qcut` raising error when taking ``Float64DType`` as input (:issue:`40730`) +- Bug in :meth:`BaseMaskedArray.to_numpy` does not output ``numeric_dtype`` with ``numeric_dtype`` input (:issue:`40630`) Strings ^^^^^^^ diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 11f9f645920ec..30b703bb4885d 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -32,6 +32,8 @@ from pandas.core.dtypes.common import ( is_dtype_equal, is_integer, + is_integer_dtype, + is_numeric_dtype, is_object_dtype, is_scalar, is_string_dtype, @@ -244,7 +246,12 @@ def to_numpy( # type: ignore[override] Examples -------- - An object-dtype is the default result + Other than numerical type input (int and float), object-dtype is + the default result + + >>> a = pd.Series([1, 2, 3], dtype=pd.Int64Dtype()) + >>> a.to_numpy() + array([1, 2, 3], dtype=int64) >>> a = pd.array([True, False, pd.NA], dtype="boolean") >>> a.to_numpy() @@ -280,10 +287,27 @@ def to_numpy( # type: ignore[override] if na_value is lib.no_default: na_value = libmissing.NA if dtype is None: - # error: Incompatible types in assignment (expression has type - # "Type[object]", variable has type "Union[str, dtype[Any], None]") - dtype = object # type: ignore[assignment] - if self._hasna: + if is_numeric_dtype(self): + dtype = self.dtype.numpy_dtype + else: + # error: Incompatible types in assignment (expression has type + # "Type[object]", variable has type "Union[str, dtype[Any], None]") + dtype = object # type: ignore[assignment] + + if is_numeric_dtype(self): + + # If there is NA and the data is of int type, a float + # is being returned as int type cannot support np.nan. + if is_integer_dtype(self) and self._hasna: + data = self._data.astype(float) + else: + data = self._data.astype(dtype) + + # For numerical input, pd.na is replaced with np.nan + if self._hasna is True: + data[np.where(self._mask is True)] = np.nan + + elif self._hasna: if ( not is_object_dtype(dtype) and not is_string_dtype(dtype) diff --git a/pandas/tests/arrays/test_numpy.py b/pandas/tests/arrays/test_numpy.py index 753ec99e683e6..6b6453607ca7b 100644 --- a/pandas/tests/arrays/test_numpy.py +++ b/pandas/tests/arrays/test_numpy.py @@ -154,6 +154,21 @@ def test_to_numpy(): tm.assert_numpy_array_equal(result, expected) +@pytest.mark.parametrize( + "data_content,data_type,expected_result", + [ + ([1, 2, 3], pd.Float64Dtype(), np.array([1, 2, 3], dtype=np.float64)), + ([1, 2, 3], "Int64", np.array([1, 2, 3], dtype=np.int64)), + ([1, 2, pd.NA], pd.Float64Dtype(), np.array([1, 2, np.nan], dtype=np.float64)), + ([1, 2, pd.NA], "Int64", np.array([1, 2, np.nan], dtype=np.float64)), + ], +) +def test_to_numpy_int_float(data_content, data_type, expected_result): + data = pd.Series(data_content, dtype=data_type) + actual_result = data.to_numpy() + assert np.array_equal(actual_result, expected_result, equal_nan=True) + + # ---------------------------------------------------------------------------- # Setitem