-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Allow interpolate() to fill backwards as well as forwards #10691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -329,6 +329,10 @@ Interpolation | |
:meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have | ||
revamped interpolation methods and functionality. | ||
|
||
.. versionadded:: 0.17.0 | ||
|
||
The ``limit_direction`` keyword argument was added. | ||
|
||
Both Series and Dataframe objects have an ``interpolate`` method that, by default, | ||
performs linear interpolation at missing datapoints. | ||
|
||
|
@@ -448,17 +452,33 @@ at the new values. | |
.. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation | ||
.. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html | ||
|
||
Interpolation Limits | ||
^^^^^^^^^^^^^^^^^^^^ | ||
|
||
Like other pandas fill methods, ``interpolate`` accepts a ``limit`` keyword | ||
argument. Use this to limit the number of consecutive interpolations, keeping | ||
``NaN`` values for interpolations that are too far from the last valid | ||
argument. Use this argument to limit the number of consecutive interpolations, | ||
keeping ``NaN`` values for interpolations that are too far from the last valid | ||
observation: | ||
|
||
.. ipython:: python | ||
|
||
ser = pd.Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||
ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13]) | ||
ser.interpolate(limit=2) | ||
|
||
By default, ``limit`` applies in a forward direction, so that only ``NaN`` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would make a sub-section (under interpolation). |
||
values after a non-``NaN`` value can be filled. If you provide ``'backward'`` or | ||
``'both'`` for the ``limit_direction`` keyword argument, you can fill ``NaN`` | ||
values before non-``NaN`` values, or both before and after non-``NaN`` values, | ||
respectively: | ||
|
||
.. ipython:: python | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. show |
||
ser.interpolate(limit=1) # limit_direction == 'forward' | ||
|
||
ser.interpolate(limit=1, limit_direction='backward') | ||
|
||
ser.interpolate(limit=1, limit_direction='both') | ||
|
||
.. _missing_data.replace: | ||
|
||
Replacing Generic Values | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2964,7 +2964,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, | |
return self._constructor(new_data).__finalize__(self) | ||
|
||
def interpolate(self, method='linear', axis=0, limit=None, inplace=False, | ||
downcast=None, **kwargs): | ||
limit_direction='forward', downcast=None, **kwargs): | ||
""" | ||
Interpolate values according to different methods. | ||
|
||
|
@@ -3001,6 +3001,12 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, | |
* 1: fill row-by-row | ||
limit : int, default None. | ||
Maximum number of consecutive NaNs to fill. | ||
limit_direction : {'forward', 'backward', 'both'}, defaults to 'forward' | ||
If limit is specified, consecutive NaNs will be filled in this | ||
direction. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add here |
||
|
||
.. versionadded:: 0.17.0 | ||
|
||
inplace : bool, default False | ||
Update the NDFrame in place if possible. | ||
downcast : optional, 'infer' or None, defaults to None | ||
|
@@ -3071,6 +3077,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, | |
index=index, | ||
values=_maybe_transposed_self, | ||
limit=limit, | ||
limit_direction=limit_direction, | ||
inplace=inplace, | ||
downcast=downcast, | ||
**kwargs | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -857,10 +857,79 @@ def test_interp_scipy_basic(self): | |
|
||
def test_interp_limit(self): | ||
s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||
|
||
expected = Series([1., 3., 5., 7., np.nan, 11.]) | ||
result = s.interpolate(method='linear', limit=2) | ||
assert_series_equal(result, expected) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the issue number as a comment |
||
def test_interp_limit_forward(self): | ||
s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||
|
||
# Provide 'forward' (the default) explicitly here. | ||
expected = Series([1., 3., 5., 7., np.nan, 11.]) | ||
|
||
result = s.interpolate( | ||
method='linear', limit=2, limit_direction='forward') | ||
assert_series_equal(result, expected) | ||
|
||
result = s.interpolate( | ||
method='linear', limit=2, limit_direction='FORWARD') | ||
assert_series_equal(result, expected) | ||
|
||
def test_interp_limit_bad_direction(self): | ||
s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||
expected = Series([1., 3., 5., 7., 9., 11.]) | ||
|
||
self.assertRaises(ValueError, s.interpolate, | ||
method='linear', limit=2, | ||
limit_direction='abc') | ||
|
||
# raises an error even if no limit is specified. | ||
self.assertRaises(ValueError, s.interpolate, | ||
method='linear', | ||
limit_direction='abc') | ||
|
||
def test_interp_limit_direction(self): | ||
# These tests are for issue #9218 -- fill NaNs in both directions. | ||
s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||
|
||
expected = Series([1., 3., np.nan, 7., 9., 11.]) | ||
result = s.interpolate( | ||
method='linear', limit=2, limit_direction='backward') | ||
assert_series_equal(result, expected) | ||
|
||
expected = Series([1., 3., 5., np.nan, 9., 11.]) | ||
result = s.interpolate( | ||
method='linear', limit=1, limit_direction='both') | ||
assert_series_equal(result, expected) | ||
|
||
# Check that this works on a longer series of nans. | ||
s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) | ||
|
||
expected = Series([1., 3., 4., 5., 6., 7., 9., 10., 11., 12., 12.]) | ||
result = s.interpolate( | ||
method='linear', limit=2, limit_direction='both') | ||
assert_series_equal(result, expected) | ||
|
||
expected = Series([1., 3., 4., np.nan, 6., 7., 9., 10., 11., 12., 12.]) | ||
result = s.interpolate( | ||
method='linear', limit=1, limit_direction='both') | ||
assert_series_equal(result, expected) | ||
|
||
def test_interp_limit_to_ends(self): | ||
# These test are for issue #10420 -- flow back to beginning. | ||
s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) | ||
|
||
expected = Series([5., 5., 5., 7., 9., np.nan]) | ||
result = s.interpolate( | ||
method='linear', limit=2, limit_direction='backward') | ||
assert_series_equal(result, expected) | ||
|
||
expected = Series([5., 5., 5., 7., 9., 9.]) | ||
result = s.interpolate( | ||
method='linear', limit=2, limit_direction='both') | ||
assert_series_equal(result, expected) | ||
|
||
def test_interp_all_good(self): | ||
# scipy | ||
tm._skip_if_no_scipy() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is fine