Skip to content

Allow fillna(value=None, method="constant") #28124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,8 +1813,6 @@ def fillna(self, value=None, method=None, limit=None):
value, method, validate_scalar_dict_value=False
)

if value is None:
value = np.nan
if limit is not None:
raise NotImplementedError(
"specifying a limit for fillna has not been implemented yet"
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,12 +560,15 @@ def fillna(self, value=None, method=None, limit=None):
)
raise TypeError(msg)

value = getattr(value, "_values", value)
self._check_closed_matches(value, name="value")
if value is not None:
value = getattr(value, "_values", value)
self._check_closed_matches(value, name="value")

left = self.left.fillna(value=value.left)
right = self.right.fillna(value=value.right)
return self._shallow_copy(left, right)
left = self.left.fillna(value=value.left)
right = self.right.fillna(value=value.right)
return self._shallow_copy(left, right)
else:
return self

@property
def dtype(self):
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,20 +955,20 @@ def fillna(self, value=None, method=None, limit=None):
When ``self.fill_value`` is not NA, the result dtype will be
``self.dtype``. Again, this preserves the amount of memory used.
"""
if (method is None and value is None) or (
method is not None and value is not None
if (method is None and value in (None, "constant")) or (
method is not None and value not in (None, "constant")
):
raise ValueError("Must specify one of 'method' or 'value'.")

elif method is not None:
elif method not in (None, "constant"):
msg = "fillna with 'method' requires high memory usage."
warnings.warn(msg, PerformanceWarning)
filled = interpolate_2d(np.asarray(self), method=method, limit=limit)
return type(self)(filled, fill_value=self.fill_value)

else:
new_values = np.where(isna(self.sp_values), value, self.sp_values)

new_values = self.sp_values.copy()
new_values[isna(self.sp_values)] = value
if self._null_fill_value:
# This is essentially just updating the dtype.
new_dtype = SparseDtype(self.dtype.subtype, fill_value=value)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6194,7 +6194,7 @@ def fillna(
axis = 0
axis = self._get_axis_number(axis)

if value is None:
if value is None and method is not None:

if self._is_mixed_type and axis == 1:
if inplace:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def _isnan(self):
@Appender(ibase._index_shared_docs["fillna"])
def fillna(self, value, downcast=None):
self._assert_can_do_op(value)
return CategoricalIndex(self._data.fillna(value), name=self.name)
return CategoricalIndex(self._data.fillna(value, method="constant"), name=self.name)

def argsort(self, *args, **kwargs):
return self.values.argsort(*args, **kwargs)
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def mask_missing(arr, values_to_mask):

def clean_fill_method(method, allow_nearest=False):
# asfreq is compat for resampling
if method in [None, "asfreq"]:
if method in [None, "asfreq", "constant"]:
return None

if isinstance(method, str):
Expand All @@ -84,16 +84,17 @@ def clean_fill_method(method, allow_nearest=False):
elif method == "bfill":
method = "backfill"

valid_methods = ["pad", "backfill"]
expecting = "pad (ffill) or backfill (bfill)"
valid_methods = ["pad", "backfill", "constant"]
expecting = "pad (ffill), backfill (bfill), or constant"
if allow_nearest:
valid_methods.append("nearest")
expecting = "pad (ffill), backfill (bfill) or nearest"
expecting = "pad (ffill), backfill (bfill), constant, or nearest"
if method not in valid_methods:
msg = "Invalid fill method. Expecting {expecting}. Got {method}".format(
expecting=expecting, method=method
)
raise ValueError(msg)

return method


Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/arrays/categorical/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,9 @@ def test_fillna_iterable_category(self, named):
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])

tm.assert_categorical_equal(result, expected)

def test_fillna_None(self):
cat = Categorical([1, 2, 3, None, np.nan])
result = cat.fillna(None, method="constant")
expected = Categorical([1, 2, 3, None, None])
tm.assert_categorical_equal(result, expected)
5 changes: 5 additions & 0 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,11 @@ def test_fillna(self):
exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([1, np.nan, np.nan, np.nan], fill_value=0)
res = s.fillna(None, method="constant")
exp = s
tm.assert_sp_array_equal(res, exp)

# float dtype's fill_value is np.nan, replaced by -1
s = SparseArray([0.0, 0.0, 0.0, 0.0])
res = s.fillna(-1)
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,12 @@ def test_fill_value_when_combine_const(self):
res = df.add(2, fill_value=0)
assert_frame_equal(res, exp)

def test_fillna_None(self):
df = DataFrame({"a": [1, 2, np.nan], "b": ['a', None, 'c']})
exp = df.fillna(None, method="constant")
res = exp
assert_frame_equal(res, exp)


class TestDataFrameInterpolate:
def test_interp_basic(self):
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/indexes/datetimes/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,21 @@ def test_fillna_datetime64(self, tz):
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)

# fill None
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"])
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)

# fill None on object
idx = pd.Index(
[
pd.Timestamp("2011-01-01 09:00", tz=tz),
None,
pd.Timestamp("2011-01-01 11:00", tz=tz),
"x",
],
dtype=object,
)
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)
18 changes: 18 additions & 0 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ def test_fillna_period(self):
)
tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01", freq="D")), exp)

# fill None
idx = pd.PeriodIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], freq="H")
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)

# fill None on object
idx = pd.Index(
[
pd.Period("2011-01-01 09:00", freq="H"),
None,
pd.Period("2011-01-01 11:00", freq="H"),
"x",
],
dtype=object,
)
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)

def test_no_millisecond_field(self):
msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
with pytest.raises(AttributeError, match=msg):
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,11 @@ def test_fillna_categorical(self):
with pytest.raises(ValueError, match=msg):
idx.fillna(2.0)

# fill by None
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)

def test_take_fill_value(self):
# GH 12631

Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,11 @@ def test_fillna_float64(self):
exp = Index([1.0, "obj", 3.0], name="x")
tm.assert_index_equal(idx.fillna("obj"), exp)

def test_fillna_None(self):
idx = Index([1.0, np.nan, 3.0], dtype=float, name="x")
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)

def test_take_fill_value(self):
# GH 12631
idx = pd.Float64Index([1.0, 2.0, 3.0], name="xxx")
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/timedeltas/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,18 @@ def test_fillna_timedelta(self):
)
tm.assert_index_equal(idx.fillna("x"), exp)

# fill None
idx = pd.TimedeltaIndex(["1 day", pd.NaT, "3 day"])
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)

# fill None on object
exp = pd.Index(
[pd.Timedelta("1 day"), None, pd.Timedelta("3 day")], dtype=object
)
exp = idx
tm.assert_index_equal(idx.fillna(None), exp)

@pytest.mark.parametrize("sort", [None, False])
def test_difference_freq(self, sort):
# GH14323: Difference of TimedeltaIndex should not preserve frequency
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def test_fillna():

msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
r" \(bfill\) or nearest\. Got 0"
r" \(bfill\), constant, or nearest\. Got 0"
)
with pytest.raises(ValueError, match=msg):
r.fillna(0)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/indexing/test_alter_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def test_reindex_corner(test_data):
ts = test_data.ts[::2]
msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
r" \(bfill\) or nearest\. Got foo"
r" \(bfill\), constant, or nearest\. Got foo"
)
with pytest.raises(ValueError, match=msg):
ts.reindex(test_data.ts.index, method="foo")
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def test_replace_with_single_list(self):
# make sure things don't get corrupted when fillna call fails
s = ser.copy()
msg = (
r"Invalid fill method\. Expecting pad \(ffill\) or backfill"
r" \(bfill\)\. Got crash_cymbal"
r"Invalid fill method\. Expecting pad \(ffill\), backfill"
r" \(bfill\), or constant\. Got crash_cymbal"
)
with pytest.raises(ValueError, match=msg):
s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
Expand Down
9 changes: 5 additions & 4 deletions pandas/util/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,17 +356,18 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):

if value is None and method is None:
raise ValueError("Must specify a fill 'value' or 'method'.")
elif value is None and method is not None:
method = clean_fill_method(method)
elif value is None and method not in (None, "constant"):
pass

elif value is not None and method is None:
elif value is not None and method in (None, "constant"):
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
raise TypeError(
'"value" parameter must be a scalar or dict, but '
'you passed a "{0}"'.format(type(value).__name__)
)

elif value is not None and method is not None:
elif value is not None and method not in (None, "constant"):
raise ValueError("Cannot specify both 'value' and 'method'.")

method = clean_fill_method(method)
return value, method