From d138a576f3d4e35a42bdbc7d74cb16ed02af6779 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 11 Feb 2020 07:57:40 -0500 Subject: [PATCH 1/9] Allow subtraction of a generic Index of cftime.datetimes from a CFTimeIndex --- xarray/coding/cftimeindex.py | 18 +++++++++++----- xarray/tests/test_cftimeindex.py | 35 +++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 8b440812ca9..5ad0e416a66 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -49,6 +49,7 @@ from xarray.core.utils import is_scalar +from ..core.common import _contains_cftime_datetimes from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name @@ -429,7 +430,11 @@ def __radd__(self, other): def __sub__(self, other): import cftime - if isinstance(other, (CFTimeIndex, cftime.datetime)): + if _contains_datetime_timedeltas(other): + return CFTimeIndex(np.array(self) - other) + elif isinstance(other, pd.TimedeltaIndex): + return CFTimeIndex(np.array(self) - other.to_pytimedelta()) + elif isinstance(other, cftime.datetime) or _contains_cftime_datetimes(other): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) except OverflowError: @@ -437,11 +442,8 @@ def __sub__(self, other): "The time difference exceeds the range of values " "that can be expressed at the nanosecond resolution." ) - - elif isinstance(other, pd.TimedeltaIndex): - return CFTimeIndex(np.array(self) - other.to_pytimedelta()) else: - return CFTimeIndex(np.array(self) - other) + return NotImplemented def __rsub__(self, other): return pd.TimedeltaIndex(other - np.array(self)) @@ -554,3 +556,9 @@ def _parse_array_of_cftime_strings(strings, date_type): return np.array( [_parse_iso8601_without_reso(date_type, s) for s in strings.ravel()] ).reshape(strings.shape) + + +def _contains_datetime_timedeltas(array): + """Check if an input array contains datetime.timedelta objects.""" + array = np.atleast_1d(array) + return isinstance(array[0], timedelta) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index a8ee3c97042..5ec4e682f80 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -725,7 +725,7 @@ def test_timedeltaindex_add_cftimeindex(calendar): @requires_cftime -def test_cftimeindex_sub(index): +def test_cftimeindex_sub_timedelta(index): date_type = index.date_type expected_dates = [ date_type(1, 1, 2), @@ -740,6 +740,28 @@ def test_cftimeindex_sub(index): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize( + "other", + [np.array(4 * [timedelta(days=1)]), + np.array(timedelta(days=1))], + ids=["1d-array", "scalar-array"] +) +def test_cftimeindex_sub_timedelta_array(index, other): + date_type = index.date_type + expected_dates = [ + date_type(1, 1, 2), + date_type(1, 2, 2), + date_type(2, 1, 2), + date_type(2, 2, 2), + ] + expected = CFTimeIndex(expected_dates) + result = index + timedelta(days=2) + result = result - other + assert result.equals(expected) + assert isinstance(result, CFTimeIndex) + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_cftimeindex(calendar): @@ -782,6 +804,17 @@ def test_cftimeindex_sub_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_index_of_cftime_datetimes(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + b = pd.Index(a.values) + expected = a - a + result = a - b + assert result.equals(expected) + assert isinstance(result, pd.TimedeltaIndex) + + @requires_cftime def test_cftimeindex_rsub(index): with pytest.raises(TypeError): From bd92d06ed0b6c1f80feb46d96a580f0e4d951b72 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 11 Feb 2020 08:09:16 -0500 Subject: [PATCH 2/9] black --- xarray/tests/test_cftimeindex.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 5ec4e682f80..31c54699fde 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -743,9 +743,8 @@ def test_cftimeindex_sub_timedelta(index): @requires_cftime @pytest.mark.parametrize( "other", - [np.array(4 * [timedelta(days=1)]), - np.array(timedelta(days=1))], - ids=["1d-array", "scalar-array"] + [np.array(4 * [timedelta(days=1)]), np.array(timedelta(days=1))], + ids=["1d-array", "scalar-array"], ) def test_cftimeindex_sub_timedelta_array(index, other): date_type = index.date_type From 9fddded5920f57bc219f2721ab253e953adb2839 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 11 Feb 2020 08:20:24 -0500 Subject: [PATCH 3/9] Test that NotImplemented logic works --- xarray/coding/cftimeindex.py | 4 +--- xarray/tests/test_cftimeindex.py | 8 ++++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 5ad0e416a66..74fde83dbce 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -428,13 +428,11 @@ def __radd__(self, other): return CFTimeIndex(other + np.array(self)) def __sub__(self, other): - import cftime - if _contains_datetime_timedeltas(other): return CFTimeIndex(np.array(self) - other) elif isinstance(other, pd.TimedeltaIndex): return CFTimeIndex(np.array(self) - other.to_pytimedelta()) - elif isinstance(other, cftime.datetime) or _contains_cftime_datetimes(other): + elif _contains_cftime_datetimes(np.array(other)): try: return pd.TimedeltaIndex(np.array(self) - np.array(other)) except OverflowError: diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 31c54699fde..1f62e0e0bc6 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -814,6 +814,14 @@ def test_cftimeindex_sub_index_of_cftime_datetimes(calendar): assert isinstance(result, pd.TimedeltaIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_cftimeindex_sub_not_implemented(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(TypeError, match="unsupported operand"): + a - 1 + + @requires_cftime def test_cftimeindex_rsub(index): with pytest.raises(TypeError): From 44809220aea24ca6537a3c4e381e297e005316d9 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Feb 2020 06:49:05 -0500 Subject: [PATCH 4/9] Vendor _get_nearest_indexer and _filter_indexer_tolerance --- xarray/coding/cftimeindex.py | 29 ++++++++++++++++++++++++++++- xarray/tests/test_cftimeindex.py | 10 ++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 74fde83dbce..6c19d2b87a4 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -327,6 +327,29 @@ def _get_string_slice(self, key): raise KeyError(key) return loc + def _get_nearest_indexer(self, target, limit, tolerance): + """Adapted from pandas.Index._get_nearest_indexer""" + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + left_distances = abs(self.values[left_indexer] - target.values) + right_distances = abs(self.values[right_indexer] - target.values) + + if self.is_monotonic_increasing: + condition = (left_distances < right_distances) | (right_indexer == -1) + else: + condition = (left_distances <= right_distances) | (right_indexer == -1) + indexer = np.where(condition, left_indexer, right_indexer) + + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + return indexer + + def _filter_indexer_tolerance(self, target, indexer, tolerance): + """Copied from pandas.Index._get_filter_tolerance""" + distance = abs(self.values[indexer] - target) + indexer = np.where(distance <= tolerance, indexer, -1) + return indexer + def get_loc(self, key, method=None, tolerance=None): """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" if isinstance(key, str): @@ -444,7 +467,11 @@ def __sub__(self, other): return NotImplemented def __rsub__(self, other): - return pd.TimedeltaIndex(other - np.array(self)) + try: + return pd.TimedeltaIndex(other - np.array(self)) + except OverflowError: + # TODO: add a warning here + return pd.Index(np.array(other) - np.array(self), dtype="O") def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 1f62e0e0bc6..e1dc5f7b1a0 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -450,6 +450,13 @@ def test_sel_date_scalar(da, date_type, index): assert_identical(result, expected) +@requires_cftime +def test_sel_date_distant_date(da, date_type, index): + expected = xr.DataArray(4).assign_coords(time=index[3]) + result = da.sel(time=date_type(2000, 1, 1), method="nearest") + assert_identical(result, expected) + + @requires_cftime @pytest.mark.parametrize( "sel_kwargs", @@ -591,6 +598,9 @@ def range_args(date_type): ] +@pytest.mark.xfail( + reason="https://github.com/pydata/xarray/issues/3751#issuecomment-587438020" +) @requires_cftime def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: From 08f1c3dd49ce99cac83dd9aed2ad20b1717f9f91 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Feb 2020 06:57:40 -0500 Subject: [PATCH 5/9] Test OverflowError in __rsub__ --- xarray/coding/cftimeindex.py | 6 ++++-- xarray/tests/test_cftimeindex.py | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 6c19d2b87a4..fd185124d03 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -470,8 +470,10 @@ def __rsub__(self, other): try: return pd.TimedeltaIndex(other - np.array(self)) except OverflowError: - # TODO: add a warning here - return pd.Index(np.array(other) - np.array(self), dtype="O") + raise ValueError( + "The time difference exceeds the range of values " + "that can be expressed at the nanosecond resolution." + ) def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index e1dc5f7b1a0..725177a41d4 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -802,6 +802,14 @@ def test_cftime_datetime_sub_cftimeindex(calendar): assert isinstance(result, pd.TimedeltaIndex) +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_distant_cftime_datetime_sub_cftimeindex(calendar): + a = xr.cftime_range("2000", periods=5, calendar=calendar) + with pytest.raises(ValueError, match="difference exceeds"): + a.date_type(1, 1, 1) - a + + @requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_timedeltaindex(calendar): From 1f573a3de49bf9d4cd551afa4142020351f2ea0c Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Feb 2020 08:11:34 -0500 Subject: [PATCH 6/9] Fix name of pandas method in docstring --- xarray/coding/cftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index fd185124d03..c6e34991321 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -345,7 +345,7 @@ def _get_nearest_indexer(self, target, limit, tolerance): return indexer def _filter_indexer_tolerance(self, target, indexer, tolerance): - """Copied from pandas.Index._get_filter_tolerance""" + """Copied from pandas.Index._filter_indexer_tolerance""" distance = abs(self.values[indexer] - target) indexer = np.where(distance <= tolerance, indexer, -1) return indexer From c91e6252d0ee659d6b6693ddb718f8942d722f12 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Feb 2020 08:22:37 -0500 Subject: [PATCH 7/9] Add what's new entries --- doc/whats-new.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e3e4eca7a01..764b03bbe61 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,6 +40,12 @@ Internal Changes - Changed test_open_mfdataset_list_attr to only run with dask installed (:issue:`3777`, :pull:`3780`). By `Bruno Pagani `_. +- Preserved the ability to index with ``method="nearest"`` with a + :py:class:`CFTimeIndex` with pandas versions greater than 1.0.1 + (:issue:`3751`). By `Spencer Clark `_. +- Greater flexibility and improved test coverage of subtracting various types + of objects from a :py:class:`CFTimeIndex`. By `Spencer Clark + `_. .. _whats-new.0.15.0: From 149651370d334d652879d9ec228a6325394ebd87 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Feb 2020 08:34:14 -0500 Subject: [PATCH 8/9] Enable use of tolerance greater than 292 years --- xarray/coding/cftimeindex.py | 7 +++++-- xarray/tests/test_cftimeindex.py | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index c6e34991321..3ed50c5b88f 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -345,8 +345,11 @@ def _get_nearest_indexer(self, target, limit, tolerance): return indexer def _filter_indexer_tolerance(self, target, indexer, tolerance): - """Copied from pandas.Index._filter_indexer_tolerance""" - distance = abs(self.values[indexer] - target) + """Adapted from pandas.Index._filter_indexer_tolerance""" + if isinstance(target, pd.Index): + distance = abs(self.values[indexer] - target.values) + else: + distance = abs(self.values[indexer] - target) indexer = np.where(distance <= tolerance, indexer, -1) return indexer diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 725177a41d4..06f056da757 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -460,7 +460,11 @@ def test_sel_date_distant_date(da, date_type, index): @requires_cftime @pytest.mark.parametrize( "sel_kwargs", - [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}], + [ + {"method": "nearest"}, + {"method": "nearest", "tolerance": timedelta(days=70)}, + {"method": "nearest", "tolerance": timedelta(days=1800000)}, + ], ) def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): expected = xr.DataArray(2).assign_coords(time=index[1]) From ec4e19f44ff587628bea3d9f7b1d2b7166d8cb80 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 9 Mar 2020 07:37:40 +0000 Subject: [PATCH 9/9] newlinw --- xarray/coding/cftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index ed5ccfe8944..1ea5d3a7d11 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -726,4 +726,4 @@ def _round_to_nearest_half_even(values, unit): remainder > (unit // 2), np.logical_and(remainder == (unit // 2), quotient % 2) ) quotient[mask] += 1 - return quotient * unit \ No newline at end of file + return quotient * unit