From 26a944881dfa319e805ca6f655ae616816bc962b Mon Sep 17 00:00:00 2001 From: valtron Date: Wed, 21 Aug 2019 21:54:16 -0600 Subject: [PATCH] Allow `fillna(value=None, method="constant")` --- pandas/core/arrays/categorical.py | 2 -- pandas/core/arrays/interval.py | 13 ++++++++----- pandas/core/arrays/sparse.py | 10 +++++----- pandas/core/generic.py | 2 +- pandas/core/indexes/category.py | 2 +- pandas/core/missing.py | 9 +++++---- .../tests/arrays/categorical/test_missing.py | 6 ++++++ pandas/tests/arrays/sparse/test_array.py | 5 +++++ pandas/tests/frame/test_missing.py | 6 ++++++ pandas/tests/indexes/datetimes/test_missing.py | 18 ++++++++++++++++++ pandas/tests/indexes/period/test_period.py | 18 ++++++++++++++++++ pandas/tests/indexes/test_category.py | 5 +++++ pandas/tests/indexes/test_numeric.py | 5 +++++ .../tests/indexes/timedeltas/test_timedelta.py | 12 ++++++++++++ pandas/tests/resample/test_resample_api.py | 2 +- .../tests/series/indexing/test_alter_index.py | 2 +- pandas/tests/series/test_replace.py | 4 ++-- pandas/util/_validators.py | 9 +++++---- 18 files changed, 104 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a895da6184eeb..a70e83274b4a8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1813,8 +1813,6 @@ def fillna(self, value=None, method=None, limit=None): value, method, validate_scalar_dict_value=False ) - if value is None: - value = np.nan if limit is not None: raise NotImplementedError( "specifying a limit for fillna has not been implemented yet" diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 7a14d6f1b619a..896093cf8a4a5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -560,12 +560,15 @@ def fillna(self, value=None, method=None, limit=None): ) raise TypeError(msg) - value = getattr(value, "_values", value) - self._check_closed_matches(value, name="value") + if value is not None: + value = getattr(value, "_values", value) + self._check_closed_matches(value, name="value") - left = self.left.fillna(value=value.left) - right = self.right.fillna(value=value.right) - return self._shallow_copy(left, right) + left = self.left.fillna(value=value.left) + right = self.right.fillna(value=value.right) + return self._shallow_copy(left, right) + else: + return self @property def dtype(self): diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 201174b6b1995..c2e5ac2641f6f 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -955,20 +955,20 @@ def fillna(self, value=None, method=None, limit=None): When ``self.fill_value`` is not NA, the result dtype will be ``self.dtype``. Again, this preserves the amount of memory used. """ - if (method is None and value is None) or ( - method is not None and value is not None + if (method is None and value in (None, "constant")) or ( + method is not None and value not in (None, "constant") ): raise ValueError("Must specify one of 'method' or 'value'.") - elif method is not None: + elif method not in (None, "constant"): msg = "fillna with 'method' requires high memory usage." warnings.warn(msg, PerformanceWarning) filled = interpolate_2d(np.asarray(self), method=method, limit=limit) return type(self)(filled, fill_value=self.fill_value) else: - new_values = np.where(isna(self.sp_values), value, self.sp_values) - + new_values = self.sp_values.copy() + new_values[isna(self.sp_values)] = value if self._null_fill_value: # This is essentially just updating the dtype. new_dtype = SparseDtype(self.dtype.subtype, fill_value=value) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba1c516b9b444..d3ff91dd505cf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6194,7 +6194,7 @@ def fillna( axis = 0 axis = self._get_axis_number(axis) - if value is None: + if value is None and method is not None: if self._is_mixed_type and axis == 1: if inplace: diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 82806c7351db6..bedb077c29c5b 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -439,7 +439,7 @@ def _isnan(self): @Appender(ibase._index_shared_docs["fillna"]) def fillna(self, value, downcast=None): self._assert_can_do_op(value) - return CategoricalIndex(self._data.fillna(value), name=self.name) + return CategoricalIndex(self._data.fillna(value, method="constant"), name=self.name) def argsort(self, *args, **kwargs): return self.values.argsort(*args, **kwargs) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index bc81fbb7e1ce0..69bcb05880715 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -74,7 +74,7 @@ def mask_missing(arr, values_to_mask): def clean_fill_method(method, allow_nearest=False): # asfreq is compat for resampling - if method in [None, "asfreq"]: + if method in [None, "asfreq", "constant"]: return None if isinstance(method, str): @@ -84,16 +84,17 @@ def clean_fill_method(method, allow_nearest=False): elif method == "bfill": method = "backfill" - valid_methods = ["pad", "backfill"] - expecting = "pad (ffill) or backfill (bfill)" + valid_methods = ["pad", "backfill", "constant"] + expecting = "pad (ffill), backfill (bfill), or constant" if allow_nearest: valid_methods.append("nearest") - expecting = "pad (ffill), backfill (bfill) or nearest" + expecting = "pad (ffill), backfill (bfill), constant, or nearest" if method not in valid_methods: msg = "Invalid fill method. Expecting {expecting}. Got {method}".format( expecting=expecting, method=method ) raise ValueError(msg) + return method diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 3037ac79cd592..be5d1f7ab0e86 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -82,3 +82,9 @@ def test_fillna_iterable_category(self, named): expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) tm.assert_categorical_equal(result, expected) + + def test_fillna_None(self): + cat = Categorical([1, 2, 3, None, np.nan]) + result = cat.fillna(None, method="constant") + expected = Categorical([1, 2, 3, None, None]) + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index b94e2a16d217a..8dec8e0a98a59 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -808,6 +808,11 @@ def test_fillna(self): exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) + s = SparseArray([1, np.nan, np.nan, np.nan], fill_value=0) + res = s.fillna(None, method="constant") + exp = s + tm.assert_sp_array_equal(res, exp) + # float dtype's fill_value is np.nan, replaced by -1 s = SparseArray([0.0, 0.0, 0.0, 0.0]) res = s.fillna(-1) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 94667ecfa837d..46657cf41b11a 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -721,6 +721,12 @@ def test_fill_value_when_combine_const(self): res = df.add(2, fill_value=0) assert_frame_equal(res, exp) + def test_fillna_None(self): + df = DataFrame({"a": [1, 2, np.nan], "b": ['a', None, 'c']}) + exp = df.fillna(None, method="constant") + res = exp + assert_frame_equal(res, exp) + class TestDataFrameInterpolate: def test_interp_basic(self): diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py index 6d94319b33b02..1fcb9d0c2508b 100644 --- a/pandas/tests/indexes/datetimes/test_missing.py +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -60,3 +60,21 @@ def test_fillna_datetime64(self, tz): dtype=object, ) tm.assert_index_equal(idx.fillna("x"), exp) + + # fill None + idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"]) + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) + + # fill None on object + idx = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00", tz=tz), + None, + pd.Timestamp("2011-01-01 11:00", tz=tz), + "x", + ], + dtype=object, + ) + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 8b3b66bd1ee6b..5125362713030 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -93,6 +93,24 @@ def test_fillna_period(self): ) tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01", freq="D")), exp) + # fill None + idx = pd.PeriodIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], freq="H") + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) + + # fill None on object + idx = pd.Index( + [ + pd.Period("2011-01-01 09:00", freq="H"), + None, + pd.Period("2011-01-01 11:00", freq="H"), + "x", + ], + dtype=object, + ) + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) + def test_no_millisecond_field(self): msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 67bf9bd20e716..9c714c8aa3877 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -985,6 +985,11 @@ def test_fillna_categorical(self): with pytest.raises(ValueError, match=msg): idx.fillna(2.0) + # fill by None + idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) + def test_take_fill_value(self): # GH 12631 diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f246307e63e3b..13853c20fa5ee 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -401,6 +401,11 @@ def test_fillna_float64(self): exp = Index([1.0, "obj", 3.0], name="x") tm.assert_index_equal(idx.fillna("obj"), exp) + def test_fillna_None(self): + idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) + def test_take_fill_value(self): # GH 12631 idx = pd.Float64Index([1.0, 2.0, 3.0], name="xxx") diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index e790a913fcac2..247b9d3ed1241 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -63,6 +63,18 @@ def test_fillna_timedelta(self): ) tm.assert_index_equal(idx.fillna("x"), exp) + # fill None + idx = pd.TimedeltaIndex(["1 day", pd.NaT, "3 day"]) + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) + + # fill None on object + exp = pd.Index( + [pd.Timedelta("1 day"), None, pd.Timedelta("3 day")], dtype=object + ) + exp = idx + tm.assert_index_equal(idx.fillna(None), exp) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: Difference of TimedeltaIndex should not preserve frequency diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 94bc884d66835..8504988d1804d 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -219,7 +219,7 @@ def test_fillna(): msg = ( r"Invalid fill method\. Expecting pad \(ffill\), backfill" - r" \(bfill\) or nearest\. Got 0" + r" \(bfill\), constant, or nearest\. Got 0" ) with pytest.raises(ValueError, match=msg): r.fillna(0) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index c93a000f5e7ce..522680d3927c2 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -245,7 +245,7 @@ def test_reindex_corner(test_data): ts = test_data.ts[::2] msg = ( r"Invalid fill method\. Expecting pad \(ffill\), backfill" - r" \(bfill\) or nearest\. Got foo" + r" \(bfill\), constant, or nearest\. Got foo" ) with pytest.raises(ValueError, match=msg): ts.reindex(test_data.ts.index, method="foo") diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 06a859963cf93..28a1730e02ca8 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -122,8 +122,8 @@ def test_replace_with_single_list(self): # make sure things don't get corrupted when fillna call fails s = ser.copy() msg = ( - r"Invalid fill method\. Expecting pad \(ffill\) or backfill" - r" \(bfill\)\. Got crash_cymbal" + r"Invalid fill method\. Expecting pad \(ffill\), backfill" + r" \(bfill\), or constant\. Got crash_cymbal" ) with pytest.raises(ValueError, match=msg): s.replace([1, 2, 3], inplace=True, method="crash_cymbal") diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 8d5f9f7749682..be8a23037e63d 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -356,17 +356,18 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): if value is None and method is None: raise ValueError("Must specify a fill 'value' or 'method'.") - elif value is None and method is not None: - method = clean_fill_method(method) + elif value is None and method not in (None, "constant"): + pass - elif value is not None and method is None: + elif value is not None and method in (None, "constant"): if validate_scalar_dict_value and isinstance(value, (list, tuple)): raise TypeError( '"value" parameter must be a scalar or dict, but ' 'you passed a "{0}"'.format(type(value).__name__) ) - elif value is not None and method is not None: + elif value is not None and method not in (None, "constant"): raise ValueError("Cannot specify both 'value' and 'method'.") + method = clean_fill_method(method) return value, method