diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 6c8dd2f8da938..e20a6a04ce417 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1075,6 +1075,7 @@ ExtensionArray - Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`) - Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`) - Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`) +- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`) Styler ^^^^^^ diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 0a30c28acb090..6ae2c3a2e2749 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -168,6 +168,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype mask = mask.copy() return values, mask, dtype, inferred_type + original = values values = np.array(values, copy=copy) inferred_type = None if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): @@ -204,6 +205,22 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype else: dtype = dtype.type + if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0: + if mask.all(): + values = np.ones(values.shape, dtype=dtype) + else: + idx = np.nanargmax(values) + if int(values[idx]) != original[idx]: + # We have ints that lost precision during the cast. + inferred_type = lib.infer_dtype(original, skipna=True) + if ( + inferred_type not in ["floating", "mixed-integer-float"] + and not mask.any() + ): + values = np.array(original, dtype=dtype, copy=False) + else: + values = np.array(original, dtype="object", copy=False) + # we copy as need to coerce here if mask.any(): values = values.copy() diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 05e40e20f1226..bd53e38f4ce28 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -49,6 +49,7 @@ import pandas._testing as tm from pandas.core.api import Int64Index from pandas.core.arrays import ( + IntegerArray, IntervalArray, period_array, ) @@ -2007,6 +2008,50 @@ def test_series_constructor_ea_int_from_string_bool(self): with pytest.raises(ValueError, match="invalid literal"): Series(["True", "False", "True", pd.NA], dtype="Int64") + @pytest.mark.parametrize("val", [1, 1.0]) + def test_series_constructor_overflow_uint_ea(self, val): + # GH#38798 + max_val = np.iinfo(np.uint64).max - 1 + result = Series([max_val, val], dtype="UInt64") + expected = Series(np.array([max_val, 1], dtype="uint64"), dtype="UInt64") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("val", [1, 1.0]) + def test_series_constructor_overflow_uint_ea_with_na(self, val): + # GH#38798 + max_val = np.iinfo(np.uint64).max - 1 + result = Series([max_val, val, pd.NA], dtype="UInt64") + expected = Series( + IntegerArray( + np.array([max_val, 1, 0], dtype="uint64"), + np.array([0, 0, 1], dtype=np.bool_), + ) + ) + tm.assert_series_equal(result, expected) + + def test_series_constructor_overflow_uint_with_nan(self): + # GH#38798 + max_val = np.iinfo(np.uint64).max - 1 + result = Series([max_val, np.nan], dtype="UInt64") + expected = Series( + IntegerArray( + np.array([max_val, 1], dtype="uint64"), + np.array([0, 1], dtype=np.bool_), + ) + ) + tm.assert_series_equal(result, expected) + + def test_series_constructor_ea_all_na(self): + # GH#38798 + result = Series([np.nan, np.nan], dtype="UInt64") + expected = Series( + IntegerArray( + np.array([1, 1], dtype="uint64"), + np.array([1, 1], dtype=np.bool_), + ) + ) + tm.assert_series_equal(result, expected) + class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 1c0a8301d65cc..d3701c30aa50c 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -4,8 +4,6 @@ from numpy import iinfo import pytest -from pandas.compat import is_platform_arm - import pandas as pd from pandas import ( DataFrame, @@ -755,13 +753,7 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected ([1.0, 1.1], "Float64", "signed", "Float64"), ([1, pd.NA], "Int64", "signed", "Int8"), ([450, -300], "Int64", "signed", "Int16"), - pytest.param( - [np.iinfo(np.uint64).max - 1, 1], - "UInt64", - "signed", - "UInt64", - marks=pytest.mark.xfail(not is_platform_arm(), reason="GH38798"), - ), + ([np.iinfo(np.uint64).max - 1, 1], "UInt64", "signed", "UInt64"), ([1, 1], "Int64", "unsigned", "UInt8"), ([1.0, 1.0], "Float32", "unsigned", "UInt8"), ([1.0, 1.1], "Float64", "unsigned", "Float64"),