From 81b18cac491a94fceb8ac6cf20b42fac2d51bf99 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 10 Dec 2019 13:25:21 -0600 Subject: [PATCH 1/5] Fixed IntegerArray division by 0 When dividing by 0, the result should be `inf`, not `NaN`. Closes https://github.com/pandas-dev/pandas/issues/27398 --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/arrays/integer.py | 7 +------ pandas/tests/arrays/test_integer.py | 11 +++++++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 8326976cbec8c..155505aa398e4 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -856,6 +856,7 @@ Other - Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) - Fix :class:`AbstractHolidayCalendar` to return correct results for years after 2030 (now goes up to 2200) (:issue:`27790`) +- Fixed :class:`IntegerArray` returning ``NA`` rather than ``inf`` for operations dividing by 0 (:issue:`27398`) - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 08a3eca1e9055..d47e7e3df27e1 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -700,11 +700,6 @@ def _maybe_mask_result(self, result, mask, other, op_name): op_name : str """ - # may need to fill infs - # and mask wraparound - if is_float_dtype(result): - mask |= (result == np.inf) | (result == -np.inf) - # if we have a float operand we are by-definition # a float result # or our op is a divide @@ -748,7 +743,7 @@ def integer_arithmetic_method(self, other): # nans propagate if mask is None: - mask = self._mask + mask = self._mask.copy() else: mask = self._mask | mask diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 443a0c7e71616..a349b44c6baaa 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -339,6 +339,13 @@ def test_error(self, data, all_arithmetic_operators): with pytest.raises(NotImplementedError): opa(np.arange(len(s)).reshape(-1, len(s))) + def test_divide_by_zero(self): + # https://github.com/pandas-dev/pandas/issues/27398 + a = pd.array([0, 1, -1, None], dtype="Int64") + result = a / 0 + expected = np.array([np.nan, np.inf, -np.inf, np.nan]) + tm.assert_numpy_array_equal(result, expected) + def test_pow(self): # https://github.com/pandas-dev/pandas/issues/22022 a = integer_array([1, np.nan, np.nan, 1]) @@ -389,6 +396,10 @@ def test_compare_array(self, data, all_compare_operators): other = pd.Series([0] * len(data)) self._compare_other(data, op_name, other) + def test_no_shared_mask(self, data): + result = data + 1 + assert np.shares_memory(result._mask, data._mask) is False + class TestCasting: @pytest.mark.parametrize("dropna", [True, False]) From f0264f991eb579ce28833a12ba93afcc83d47a59 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 10 Dec 2019 15:09:50 -0600 Subject: [PATCH 2/5] negative --- pandas/tests/arrays/test_integer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 7b88c9697321b..d36b42ec87e51 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -339,11 +339,17 @@ def test_error(self, data, all_arithmetic_operators): with pytest.raises(NotImplementedError): opa(np.arange(len(s)).reshape(-1, len(s))) - def test_divide_by_zero(self): + @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) + def test_divide_by_zero(self, zero, negative): # https://github.com/pandas-dev/pandas/issues/27398 a = pd.array([0, 1, -1, None], dtype="Int64") - result = a / 0 + result = a / zero expected = np.array([np.nan, np.inf, -np.inf, np.nan]) + if negative: + values = [np.nan, -np.inf, np.inf, np.nan] + else: + values = [np.nan, np.inf, -np.inf, np.nan] + expected = np.array(values) tm.assert_numpy_array_equal(result, expected) def test_pow(self): From 9e5a69c3a32aad746ef3df29e43df352f4551f4d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 10 Dec 2019 13:51:20 -0600 Subject: [PATCH 3/5] BUG: Fixed IntegerArray pow special cases x^0 == 1 1^x == 1 --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/arrays/integer.py | 25 +++++++++++----- pandas/tests/arrays/test_integer.py | 45 +++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2ec575ef5040a..65824c009ff3c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -859,6 +859,7 @@ Other - Fix :class:`AbstractHolidayCalendar` to return correct results for years after 2030 (now goes up to 2200) (:issue:`27790`) - Fixed :class:`IntegerArray` returning ``NA`` rather than ``inf`` for operations dividing by 0 (:issue:`27398`) +- Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d47e7e3df27e1..3469f782c733a 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -718,13 +718,13 @@ def _create_arithmetic_method(cls, op): @unpack_zerodim_and_defer(op.__name__) def integer_arithmetic_method(self, other): - mask = None + omask = None if getattr(other, "ndim", 0) > 1: raise NotImplementedError("can only perform ops with 1-d structures") if isinstance(other, IntegerArray): - other, mask = other._data, other._mask + other, omask = other._data, other._mask elif is_list_like(other): other = np.asarray(other) @@ -742,17 +742,28 @@ def integer_arithmetic_method(self, other): raise TypeError("can only perform ops with numeric values") # nans propagate - if mask is None: + if omask is None: mask = self._mask.copy() else: - mask = self._mask | mask + mask = self._mask | omask - # 1 ** np.nan is 1. So we have to unmask those. if op_name == "pow": - mask = np.where(self == 1, False, mask) + # 1 ** x is 1. + mask = np.where((self._data == 1) & ~self._mask, False, mask) + # x ** 0 is 1. + if omask is not None: + mask = np.where((other == 0) & ~omask, False, mask) + else: + mask = np.where(other == 0, False, mask) elif op_name == "rpow": - mask = np.where(other == 1, False, mask) + # 1 ** x is 1. + if omask is not None: + mask = np.where((other == 1) & ~omask, False, mask) + else: + mask = np.where(other == 1, False, mask) + # x ** 0 is 1. + mask = np.where((self._data == 0) & ~self._mask, False, mask) with np.errstate(all="ignore"): result = op(self._data, other) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index d36b42ec87e51..f0a168381a1e2 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -352,16 +352,51 @@ def test_divide_by_zero(self, zero, negative): expected = np.array(values) tm.assert_numpy_array_equal(result, expected) - def test_pow(self): - # https://github.com/pandas-dev/pandas/issues/22022 - a = integer_array([1, np.nan, np.nan, 1]) - b = integer_array([1, np.nan, 1, np.nan]) + def test_pow_scalar(self): + a = pd.array([0, 1, None, 2], dtype="Int64") + result = a ** 0 + expected = pd.array([1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a ** 1 + expected = pd.array([0, 1, None, 2], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + # result = a ** pd.NA + # expected = pd.array([None, 1, None, None], dtype="Int64") + # tm.assert_extension_array_equal(result, expected) + + result = a ** np.nan + expected = np.array([np.nan, 1, np.nan, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # reversed + result = 0 ** a + expected = pd.array([1, 0, None, 0], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = 1 ** a + expected = pd.array([1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + # result = pd.NA ** a + # expected = pd.array([1, None, None, None], dtype="Int64") + # tm.assert_extension_array_equal(result, expected) + + result = np.nan ** a + expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + def test_pow_array(self): + a = integer_array([0, 0, 0, 1, 1, 1, None, None, None]) + b = integer_array([0, 1, None, 0, 1, None, 0, 1, None]) result = a ** b - expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1]) + expected = integer_array([1, 0, None, 1, 1, 1, 1, None, None]) tm.assert_extension_array_equal(result, expected) def test_rpow_one_to_na(self): # https://github.com/pandas-dev/pandas/issues/22022 + # https://github.com/pandas-dev/pandas/issues/29997 arr = integer_array([np.nan, np.nan]) result = np.array([1.0, 2.0]) ** arr expected = np.array([1.0, np.nan]) From 8cb89e166fe4730a48ea59141bfa326f8b28d9ac Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 11 Dec 2019 14:56:19 -0600 Subject: [PATCH 4/5] simplify --- pandas/tests/arrays/test_integer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index f0a168381a1e2..7bb0b065df1da 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -346,10 +346,7 @@ def test_divide_by_zero(self, zero, negative): result = a / zero expected = np.array([np.nan, np.inf, -np.inf, np.nan]) if negative: - values = [np.nan, -np.inf, np.inf, np.nan] - else: - values = [np.nan, np.inf, -np.inf, np.nan] - expected = np.array(values) + expected *= -1 tm.assert_numpy_array_equal(result, expected) def test_pow_scalar(self): From a96e6d6b91bdc0a2fc4a26cfb9596dfec42669da Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 12 Dec 2019 08:09:21 -0600 Subject: [PATCH 5/5] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: Joris Van den Bossche --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 355b8902ceef3..ee87c34d6837f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -860,7 +860,7 @@ Other - Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) - Fix :class:`AbstractHolidayCalendar` to return correct results for years after 2030 (now goes up to 2200) (:issue:`27790`) -- Fixed :class:`IntegerArray` returning ``inf`` rather than ``NA`` for operations dividing by 0 (:issue:`27398`) +- Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`) - Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) - Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)