diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index bf67ff6525005..93a77869a39ba 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -371,7 +371,7 @@ Missing ^^^^^^^ - Bug in :class:`Grouper` now correctly propagates ``dropna`` argument and :meth:`DataFrameGroupBy.transform` now correctly handles missing values for ``dropna=True`` (:issue:`35612`) -- +- Bug in :func:`isna`, and :meth:`Series.isna`, :meth:`Index.isna`, :meth:`DataFrame.isna` (and the corresponding ``notna`` functions) not recognizing ``Decimal("NaN")`` objects (:issue:`39409`) - MultiIndex diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index f6f9e7410d34c..d6a3d18f711d0 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -1,3 +1,4 @@ +from decimal import Decimal import numbers import cython @@ -36,6 +37,8 @@ cdef: bint is_32bit = not IS64 + type cDecimal = Decimal # for faster isinstance checks + cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False): """ @@ -86,6 +89,8 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False and util.is_timedelta64_object(right) and get_timedelta64_value(right) == NPY_NAT ) + elif is_decimal_na(left): + return is_decimal_na(right) return False @@ -113,7 +118,18 @@ cpdef bint checknull(object val): The difference between `checknull` and `checknull_old` is that `checknull` does *not* consider INF or NEGINF to be NA. """ - return val is C_NA or is_null_datetimelike(val, inat_is_null=False) + return ( + val is C_NA + or is_null_datetimelike(val, inat_is_null=False) + or is_decimal_na(val) + ) + + +cdef inline bint is_decimal_na(object val): + """ + Is this a decimal.Decimal object Decimal("NAN"). + """ + return isinstance(val, cDecimal) and val != val cpdef bint checknull_old(object val): diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 6b88bd26627b0..4658aa7e3cfd4 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -2,6 +2,7 @@ import collections from datetime import datetime +from decimal import Decimal from functools import wraps import operator import os @@ -146,7 +147,7 @@ + BYTES_DTYPES ) -NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA] +NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")] EMPTY_STRING_PATTERN = re.compile("^$") diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 829472f24852a..cfb2f722e3d8b 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -7,6 +7,7 @@ import numpy as np from pandas._libs.lib import no_default +from pandas._libs.missing import is_matching_na import pandas._libs.testing as _testing from pandas.core.dtypes.common import ( @@ -457,22 +458,8 @@ def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): if left_attr is right_attr: return True - elif ( - is_number(left_attr) - and np.isnan(left_attr) - and is_number(right_attr) - and np.isnan(right_attr) - ): - # np.nan - return True - elif ( - isinstance(left_attr, (np.datetime64, np.timedelta64)) - and isinstance(right_attr, (np.datetime64, np.timedelta64)) - and type(left_attr) is type(right_attr) - and np.isnat(left_attr) - and np.isnat(right_attr) - ): - # np.datetime64("nat") or np.timedelta64("nat") + elif is_matching_na(left_attr, right_attr): + # e.g. both np.nan, both NaT, both pd.NA, ... return True try: diff --git a/pandas/conftest.py b/pandas/conftest.py index ce572e42abec6..426cbf6a65aa5 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -304,7 +304,7 @@ def nselect_method(request): # ---------------------------------------------------------------- # Missing values & co. # ---------------------------------------------------------------- -@pytest.fixture(params=tm.NULL_OBJECTS, ids=str) +@pytest.fixture(params=tm.NULL_OBJECTS, ids=lambda x: type(x).__name__) def nulls_fixture(request): """ Fixture for each null type in pandas. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 3279007fcebe1..642d35977ae36 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -1,6 +1,7 @@ """ missing types & inference """ +from decimal import Decimal from functools import partial import numpy as np @@ -610,20 +611,24 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool: """ if not lib.is_scalar(obj) or not isna(obj): return False - if dtype.kind == "M": + elif dtype.kind == "M": if isinstance(dtype, np.dtype): # i.e. not tzaware - return not isinstance(obj, np.timedelta64) + return not isinstance(obj, (np.timedelta64, Decimal)) # we have to rule out tznaive dt64("NaT") - return not isinstance(obj, (np.timedelta64, np.datetime64)) - if dtype.kind == "m": - return not isinstance(obj, np.datetime64) - if dtype.kind in ["i", "u", "f", "c"]: + return not isinstance(obj, (np.timedelta64, np.datetime64, Decimal)) + elif dtype.kind == "m": + return not isinstance(obj, (np.datetime64, Decimal)) + elif dtype.kind in ["i", "u", "f", "c"]: # Numeric return obj is not NaT and not isinstance(obj, (np.datetime64, np.timedelta64)) + elif dtype == np.dtype(object): + # This is needed for Categorical, but is kind of weird + return True + # must be PeriodDType - return not isinstance(obj, (np.datetime64, np.timedelta64)) + return not isinstance(obj, (np.datetime64, np.timedelta64, Decimal)) def isna_all(arr: ArrayLike) -> bool: diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 786944816bcf6..4c0d417a975c0 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -3,6 +3,7 @@ """ import datetime +from decimal import Decimal import numpy as np import pytest @@ -538,7 +539,20 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, nulls_fi fill_value = nulls_fixture dtype = np.dtype(any_numpy_dtype_reduced) - if is_integer_dtype(dtype) and fill_value is not NaT: + if isinstance(fill_value, Decimal): + # Subject to change, but ATM (When Decimal(NAN) is being added to nulls_fixture) + # this is the existing behavior in maybe_promote, + # hinges on is_valid_na_for_dtype + if dtype.kind in ["i", "u", "f", "c"]: + if dtype.kind in ["i", "u"]: + expected_dtype = np.dtype(np.float64) + else: + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_integer_dtype(dtype) and fill_value is not NaT: # integer + other missing value (np.nan / None) casts to float expected_dtype = np.float64 exp_val_for_scalar = np.nan diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 57efe8e4840f1..ecd56b5b61244 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -317,6 +317,43 @@ def test_period(self): tm.assert_series_equal(isna(s), exp) tm.assert_series_equal(notna(s), ~exp) + def test_decimal(self): + # scalars GH#23530 + a = Decimal(1.0) + assert pd.isna(a) is False + assert pd.notna(a) is True + + b = Decimal("NaN") + assert pd.isna(b) is True + assert pd.notna(b) is False + + # array + arr = np.array([a, b]) + expected = np.array([False, True]) + result = pd.isna(arr) + tm.assert_numpy_array_equal(result, expected) + + result = pd.notna(arr) + tm.assert_numpy_array_equal(result, ~expected) + + # series + ser = Series(arr) + expected = Series(expected) + result = pd.isna(ser) + tm.assert_series_equal(result, expected) + + result = pd.notna(ser) + tm.assert_series_equal(result, ~expected) + + # index + idx = pd.Index(arr) + expected = np.array([False, True]) + result = pd.isna(idx) + tm.assert_numpy_array_equal(result, expected) + + result = pd.notna(idx) + tm.assert_numpy_array_equal(result, ~expected) + @pytest.mark.parametrize("dtype_equal", [True, False]) def test_array_equivalent(dtype_equal): @@ -619,24 +656,22 @@ def test_empty_like(self): class TestLibMissing: - def test_checknull(self): - for value in na_vals: - assert libmissing.checknull(value) + @pytest.mark.parametrize("func", [libmissing.checknull, isna]) + def test_checknull(self, func): + for value in na_vals + sometimes_na_vals: + assert func(value) for value in inf_vals: - assert not libmissing.checknull(value) + assert not func(value) for value in int_na_vals: - assert not libmissing.checknull(value) - - for value in sometimes_na_vals: - assert not libmissing.checknull(value) + assert not func(value) for value in never_na_vals: - assert not libmissing.checknull(value) + assert not func(value) def test_checknull_old(self): - for value in na_vals: + for value in na_vals + sometimes_na_vals: assert libmissing.checknull_old(value) for value in inf_vals: @@ -645,9 +680,6 @@ def test_checknull_old(self): for value in int_na_vals: assert not libmissing.checknull_old(value) - for value in sometimes_na_vals: - assert not libmissing.checknull_old(value) - for value in never_na_vals: assert not libmissing.checknull_old(value) @@ -682,6 +714,9 @@ def test_is_matching_na(self, nulls_fixture, nulls_fixture2): elif is_float(left) and is_float(right): # np.nan vs float("NaN") we consider as matching assert libmissing.is_matching_na(left, right) + elif type(left) is type(right): + # e.g. both Decimal("NaN") + assert libmissing.is_matching_na(left, right) else: assert not libmissing.is_matching_na(left, right) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 05a28f20b956a..5bf26e2ca476e 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -48,7 +48,8 @@ def test_contains(self, data, data_missing): # the data can never contain other nan-likes than na_value for na_value_obj in tm.NULL_OBJECTS: - if na_value_obj is na_value: + if na_value_obj is na_value or type(na_value_obj) == type(na_value): + # type check for e.g. two instances of Decimal("NAN") continue assert na_value_obj not in data assert na_value_obj not in data_missing diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 23b1ce250a5e5..437160e78741b 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -178,13 +178,6 @@ class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): class TestMethods(BaseDecimal, base.BaseMethodsTests): @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): - if any(x != x for x in all_data): - mark = pytest.mark.xfail( - reason="tm.assert_series_equal incorrectly raises", - raises=AssertionError, - ) - request.node.add_marker(mark) - all_data = all_data[:10] if dropna: other = np.array(all_data[~all_data.isna()]) @@ -212,12 +205,6 @@ class TestCasting(BaseDecimal, base.BaseCastingTests): class TestGroupby(BaseDecimal, base.BaseGroupbyTests): - def test_groupby_apply_identity(self, data_for_grouping, request): - if any(x != x for x in data_for_grouping): - mark = pytest.mark.xfail(reason="tm.assert_series_equal raises incorrectly") - request.node.add_marker(mark) - super().test_groupby_apply_identity(data_for_grouping) - def test_groupby_agg_extension(self, data_for_grouping): super().test_groupby_agg_extension(data_for_grouping) diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index de0850d37034d..4fba4b13835b3 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -1,6 +1,8 @@ """ Tests for the Index constructor conducting inference. """ +from decimal import Decimal + import numpy as np import pytest @@ -89,6 +91,10 @@ def test_constructor_infer_periodindex(self): def test_constructor_infer_nat_dt_like( self, pos, klass, dtype, ctor, nulls_fixture, request ): + if isinstance(nulls_fixture, Decimal): + # We dont cast these to datetime64/timedelta64 + return + expected = klass([NaT, NaT]) assert expected.dtype == dtype data = [ctor] diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 74c961418176b..99dadfba4e7aa 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -104,12 +104,14 @@ def test_numeric_compat(self): def test_insert_na(self, nulls_fixture): # GH 18295 (test missing) index = self.create_index() + na_val = nulls_fixture - if nulls_fixture is pd.NaT: + if na_val is pd.NaT: expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) else: expected = Float64Index([index[0], np.nan] + list(index[1:])) - result = index.insert(1, nulls_fixture) + + result = index.insert(1, na_val) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index bfce694637579..89248447c98d3 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,5 +1,6 @@ import datetime from datetime import timedelta +from decimal import Decimal from io import StringIO import json import os @@ -1742,8 +1743,12 @@ def test_json_pandas_na(self): result = DataFrame([[pd.NA]]).to_json() assert result == '{"0":{"0":null}}' - def test_json_pandas_nulls(self, nulls_fixture): + def test_json_pandas_nulls(self, nulls_fixture, request): # GH 31615 + if isinstance(nulls_fixture, Decimal): + mark = pytest.mark.xfail(reason="not implemented") + request.node.add_marker(mark) + result = DataFrame([[nulls_fixture]]).to_json() assert result == '{"0":{"0":null}}' diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 695aa4ca129d8..2b65655e7bdad 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -6,6 +6,7 @@ datetime, timedelta, ) +from decimal import Decimal import locale from dateutil.parser import parse @@ -2446,9 +2447,15 @@ def test_nullable_integer_to_datetime(): @pytest.mark.parametrize("klass", [np.array, list]) def test_na_to_datetime(nulls_fixture, klass): - result = pd.to_datetime(klass([nulls_fixture])) - assert result[0] is pd.NaT + if isinstance(nulls_fixture, Decimal): + with pytest.raises(TypeError, match="not convertible to datetime"): + pd.to_datetime(klass([nulls_fixture])) + + else: + result = pd.to_datetime(klass([nulls_fixture])) + + assert result[0] is pd.NaT def test_empty_string_datetime_coerce__format(): diff --git a/pandas/tests/util/test_assert_attr_equal.py b/pandas/tests/util/test_assert_attr_equal.py index 6fad38c2cd44e..115ef58e085cc 100644 --- a/pandas/tests/util/test_assert_attr_equal.py +++ b/pandas/tests/util/test_assert_attr_equal.py @@ -25,6 +25,9 @@ def test_assert_attr_equal_different_nulls(nulls_fixture, nulls_fixture2): elif is_float(nulls_fixture) and is_float(nulls_fixture2): # we consider float("nan") and np.float64("nan") to be equivalent assert tm.assert_attr_equal("na_value", obj, obj2) + elif type(nulls_fixture) is type(nulls_fixture2): + # e.g. Decimal("NaN") + assert tm.assert_attr_equal("na_value", obj, obj2) else: with pytest.raises(AssertionError, match='"na_value" are different'): tm.assert_attr_equal("na_value", obj, obj2) diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index e3384ce3caa06..2ebc6e17ba497 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -336,3 +336,13 @@ def test_allows_duplicate_labels(): with pytest.raises(AssertionError, match="