Skip to content

Commit

Permalink
Fix indexing with datetime64[ns] with pandas=1.1 (#4292)
Browse files Browse the repository at this point in the history
* Fix indexing with datetime64[ns] with pandas=1.1

Fixes #4283

The underlying issue is that calling `.item()` on a NumPy array with
`dtype=datetime64[ns]` returns an _integer_, rather than an `np.datetime64
scalar. This is somewhat baffling but works this way because `.item()`
returns native Python types, but `datetime.datetime` doesn't support
nanosecond precision.

`pandas.Index.get_loc` used to support these integers, but now is more strict.
Hence we get errors.

We can fix this by using `array[()]` to convert 0d arrays into NumPy scalars
instead of calling `array.item()`.

I've added a crude regression test. There may well be a better way to test this
but I haven't figured it out yet.

* lint fix

* add a test checking the datetime indexer

* use label.item() for non-datetime / timedelta labels

* unpin pandas in the docs

* ignore the future warning about deprecated arguments to pandas.Grouper

* Update xarray/core/indexing.py

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Add whatsnew note

Co-authored-by: Keewis <keewis@posteo.de>
Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
Co-authored-by: keewis <keewis@users.noreply.github.com>
  • Loading branch information
4 people authored Sep 16, 2020
1 parent 66ab0ae commit 59f57f3
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 16 deletions.
4 changes: 1 addition & 3 deletions ci/requirements/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ dependencies:
- netcdf4>=1.5
- numba
- numpy>=1.17
# FIXME https://github.com/pydata/xarray/issues/4287
# - pandas>=1.0
- pandas=1.0
- pandas>=1.0
- rasterio>=1.1
- seaborn
- setuptools
Expand Down
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ Bug fixes
By `Jens Svensmark <https://github.com/jenssss>`_
- Fix incorrect legend labels for :py:meth:`Dataset.plot.scatter` (:issue:`4126`).
By `Peter Hausamann <https://github.com/phausamann>`_.
- Fix indexing with datetime64 scalars with pandas 1.1 (:issue:`4283`).
By `Stephan Hoyer <https://github.com/shoyer>`_ and
`Justus Magin <https://github.com/keewis>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
22 changes: 15 additions & 7 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,14 +1126,22 @@ def resample(
dim_name = dim
dim_coord = self[dim]

if isinstance(self.indexes[dim_name], CFTimeIndex):
from .resample_cftime import CFTimeGrouper

grouper = CFTimeGrouper(freq, closed, label, base, loffset)
else:
grouper = pd.Grouper(
freq=freq, closed=closed, label=label, base=base, loffset=loffset
# TODO: remove once pandas=1.1 is the minimum required version
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
r"'(base|loffset)' in .resample\(\) and in Grouper\(\) is deprecated.",
category=FutureWarning,
)

if isinstance(self.indexes[dim_name], CFTimeIndex):
from .resample_cftime import CFTimeGrouper

grouper = CFTimeGrouper(freq, closed, label, base, loffset)
else:
grouper = pd.Grouper(
freq=freq, closed=closed, label=label, base=base, loffset=loffset
)
group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)
Expand Down
10 changes: 5 additions & 5 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,10 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
else _asarray_tuplesafe(label)
)
if label.ndim == 0:
# see https://github.com/pydata/xarray/pull/4292 for details
label_value = label[()] if label.dtype.kind in "mM" else label.item()
if isinstance(index, pd.MultiIndex):
indexer, new_index = index.get_loc_level(label.item(), level=0)
indexer, new_index = index.get_loc_level(label_value, level=0)
elif isinstance(index, pd.CategoricalIndex):
if method is not None:
raise ValueError(
Expand All @@ -189,11 +191,9 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
raise ValueError(
"'tolerance' is not a valid kwarg when indexing using a CategoricalIndex."
)
indexer = index.get_loc(label.item())
indexer = index.get_loc(label_value)
else:
indexer = index.get_loc(
label.item(), method=method, tolerance=tolerance
)
indexer = index.get_loc(label_value, method=method, tolerance=tolerance)
elif label.dtype.kind == "b":
indexer = label
else:
Expand Down
8 changes: 7 additions & 1 deletion xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ def test_sel_invalid_slice(self):
with raises_regex(ValueError, "cannot use non-scalar arrays"):
array.sel(x=slice(array.x))

def test_sel_dataarray_datetime(self):
def test_sel_dataarray_datetime_slice(self):
# regression test for GH1240
times = pd.date_range("2000-01-01", freq="D", periods=365)
array = DataArray(np.arange(365), [("time", times)])
Expand Down Expand Up @@ -1078,6 +1078,12 @@ def test_loc(self):
assert_identical(da[:3, :4], da.loc[["a", "b", "c"], np.arange(4)])
assert_identical(da[:, :4], da.loc[:, self.ds["y"] < 4])

def test_loc_datetime64_value(self):
# regression test for https://github.com/pydata/xarray/issues/4283
t = np.array(["2017-09-05T12", "2017-09-05T15"], dtype="datetime64[ns]")
array = DataArray(np.ones(t.shape), dims=("time",), coords=(t,))
assert_identical(array.loc[{"time": t[0]}], array[0])

def test_loc_assign(self):
self.ds["x"] = ("x", np.array(list("abcdefghij")))
da = self.ds["foo"]
Expand Down
9 changes: 9 additions & 0 deletions xarray/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,15 @@ def test_convert_label_indexer(self):
with pytest.raises(IndexError):
indexing.convert_label_indexer(mindex, (slice(None), 1, "no_level"))

def test_convert_label_indexer_datetime(self):
index = pd.to_datetime(["2000-01-01", "2001-01-01", "2002-01-01"])
actual = indexing.convert_label_indexer(index, "2001-01-01")
expected = (1, None)
assert actual == expected

actual = indexing.convert_label_indexer(index, index.to_numpy()[1])
assert actual == expected

def test_convert_unsorted_datetime_index_raises(self):
index = pd.to_datetime(["2001", "2000", "2002"])
with pytest.raises(KeyError):
Expand Down

0 comments on commit 59f57f3

Please sign in to comment.