diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 31f38ef0dca7b..6a73acd091844 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1166,6 +1166,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`) - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) +- Bug in :meth:`.arrays.ArrowExtensionArray._cast_pointwise_result` causing ``Decimal("NaN")`` inputs to downcast results to ``null[pyarrow]`` instead of preserving decimal dtype (:issue:`62522`) - Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b8dd44a58e8ec..3195016e5c4e9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -4,6 +4,7 @@ date, datetime, ) +from decimal import Decimal import functools import operator from pathlib import Path @@ -481,6 +482,31 @@ def _cast_pointwise_result(self, values) -> ArrayLike: except pa.lib.ArrowInvalid: # e.g. test_combine_add if we can't cast pass + elif pa.types.is_null(arr.type): + # ``pa.array`` will produce null-dtype arrays when every value is a + # Decimal NaN. Try to preserve decimal storage by rebuilding the array + # with an explicit decimal type derived from the input values. + decimals = [val for val in values if isinstance(val, Decimal)] + if decimals and all( + isinstance(val, Decimal) or isna(val) for val in values + ): + decimal_type: pa.DataType | None = None + for dec in decimals: + if getattr(dec, "is_nan", None) and dec.is_nan(): + continue + try: + decimal_type = pa.scalar(dec).type + break + except pa.ArrowInvalid: + continue + if decimal_type is None: + # All decimals were NaN -> fall back to a wide decimal so we + # can retain the decimal dtype even though values stay null. + decimal_type = pa.decimal128(38, 18) + try: + arr = pa.array(values, type=decimal_type, from_pandas=True) + except (pa.ArrowInvalid, pa.ArrowTypeError): + pass if isinstance(self.dtype, StringDtype): if pa.types.is_string(arr.type) or pa.types.is_large_string(arr.type):