From bc9a9422eef9283bc05369f587ab2ebb1b3cb12e Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 23 Jan 2025 19:14:23 +0900 Subject: [PATCH 1/7] BUG: Add fillna so that cond doesnt contain NA at the beginning of _where. (#60729) --- pandas/core/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e0a4f9d9c546a..6a13b74d7c2f6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9674,6 +9674,12 @@ def _where( if axis is not None: axis = self._get_axis_number(axis) + # We should not be filling NA. See GH#60729 + if isinstance(cond, np.ndarray): + cond[np.isnan(cond)] = True + elif isinstance(cond, NDFrame): + cond = cond.fillna(True) + # align the cond to same shape as myself cond = common.apply_if_callable(cond, self) if isinstance(cond, NDFrame): From 558569f44245717b3c636117950782df324a7a96 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 23 Jan 2025 19:31:00 +0900 Subject: [PATCH 2/7] TST: Add tests for mask with NA. (#60729) --- pandas/tests/series/indexing/test_mask.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index 3c21cd0d5ca64..aeaf24ef3926b 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -67,3 +67,19 @@ def test_mask_inplace(): rs = s.copy() rs.mask(cond, -s, inplace=True) tm.assert_series_equal(rs, s.mask(cond, -s)) + + +def test_mask_na(): + # We should not be filling pd.NA. See GH#60729 + series = Series([None, 1, 2, None, 3, 4, None]) + series = series.convert_dtypes() + cond = series <= 2 + + maskres = series.mask(cond, -99) + whereres = series.where(~(cond), -99) + + expected = Series([None, -99, -99, None, 3, 4, None]) + expected = expected.convert_dtypes() + + tm.assert_series_equal(maskres, expected) + tm.assert_series_equal(maskres, whereres) From bbbc720c7fb4a6cfca974289bd386eb3e1c39e31 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 23 Jan 2025 19:58:56 +0900 Subject: [PATCH 3/7] BUG: Fix _where to make np.ndarray mutable. (#60729) --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6a13b74d7c2f6..61cbabf3198c8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9676,6 +9676,7 @@ def _where( # We should not be filling NA. See GH#60729 if isinstance(cond, np.ndarray): + cond = np.array(cond) cond[np.isnan(cond)] = True elif isinstance(cond, NDFrame): cond = cond.fillna(True) From e2f32cb51e49ad2e4f0d87af59ae755c9c78719d Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 23 Jan 2025 20:14:25 +0900 Subject: [PATCH 4/7] DOC: Add documentation regarding the bug (#60729) --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1d8d0f6a74cb1..5e0281f59984b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -676,6 +676,7 @@ Interval Indexing ^^^^^^^^ +- Bug in :func:`Series.mask` unexpectedly filling pd.NA (:issue:`60729`) - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) From d2d5f62c120739a4b1c55fdb7bb3351d45d42337 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Sat, 25 Jan 2025 17:10:57 +0900 Subject: [PATCH 5/7] ENH: Optimze test_mask_na() Co-authored-by: WillAyd --- pandas/tests/series/indexing/test_mask.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index aeaf24ef3926b..3f2dcb8239099 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Series +from pandas import ( + Int64Dtype, + Series, +) import pandas._testing as tm @@ -71,15 +74,8 @@ def test_mask_inplace(): def test_mask_na(): # We should not be filling pd.NA. See GH#60729 - series = Series([None, 1, 2, None, 3, 4, None]) - series = series.convert_dtypes() - cond = series <= 2 + series = Series([None, 1, 2, None, 3, 4, None], dtype=Int64Dtype()) + result = series.mask(series <= 2, -99) + expected = Series([None, 1, 2, None, -99, -99, None], dtype=Int64Dtype()) - maskres = series.mask(cond, -99) - whereres = series.where(~(cond), -99) - - expected = Series([None, -99, -99, None, 3, 4, None]) - expected = expected.convert_dtypes() - - tm.assert_series_equal(maskres, expected) - tm.assert_series_equal(maskres, whereres) + tm.assert_series_equal(result, expected) From 475f2d15ea61aaf7789785b764186944dbf16a9b Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Sat, 25 Jan 2025 18:24:13 +0900 Subject: [PATCH 6/7] BUG: Fix a bug in test_mask_na() (#60729) --- pandas/tests/series/indexing/test_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index 3f2dcb8239099..de1a3ff5b64e1 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -76,6 +76,6 @@ def test_mask_na(): # We should not be filling pd.NA. See GH#60729 series = Series([None, 1, 2, None, 3, 4, None], dtype=Int64Dtype()) result = series.mask(series <= 2, -99) - expected = Series([None, 1, 2, None, -99, -99, None], dtype=Int64Dtype()) + expected = Series([None, -99, -99, None, 3, 4, None], dtype=Int64Dtype()) tm.assert_series_equal(result, expected) From db30b581bf3bb2bbeb833b8920f93d7f7cd91299 Mon Sep 17 00:00:00 2001 From: SALCAN <68040183+sanggon6107@users.noreply.github.com> Date: Sun, 9 Feb 2025 12:34:02 +0900 Subject: [PATCH 7/7] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Xiao Yuan --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5e0281f59984b..62db1e993acd3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -676,7 +676,7 @@ Interval Indexing ^^^^^^^^ -- Bug in :func:`Series.mask` unexpectedly filling pd.NA (:issue:`60729`) +- Bug in :meth:`Series.mask` unexpectedly filling ``pd.NA`` (:issue:`60729`) - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)