Skip to content

Commit 59f57f3

Browse files
shoyerkeewismax-sixty
authored
Fix indexing with datetime64[ns] with pandas=1.1 (#4292)
* Fix indexing with datetime64[ns] with pandas=1.1 Fixes #4283 The underlying issue is that calling `.item()` on a NumPy array with `dtype=datetime64[ns]` returns an _integer_, rather than an `np.datetime64 scalar. This is somewhat baffling but works this way because `.item()` returns native Python types, but `datetime.datetime` doesn't support nanosecond precision. `pandas.Index.get_loc` used to support these integers, but now is more strict. Hence we get errors. We can fix this by using `array[()]` to convert 0d arrays into NumPy scalars instead of calling `array.item()`. I've added a crude regression test. There may well be a better way to test this but I haven't figured it out yet. * lint fix * add a test checking the datetime indexer * use label.item() for non-datetime / timedelta labels * unpin pandas in the docs * ignore the future warning about deprecated arguments to pandas.Grouper * Update xarray/core/indexing.py Co-authored-by: keewis <keewis@users.noreply.github.com> * Add whatsnew note Co-authored-by: Keewis <keewis@posteo.de> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: keewis <keewis@users.noreply.github.com>
1 parent 66ab0ae commit 59f57f3

File tree

6 files changed

+40
-16
lines changed

6 files changed

+40
-16
lines changed

ci/requirements/doc.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@ dependencies:
1818
- netcdf4>=1.5
1919
- numba
2020
- numpy>=1.17
21-
# FIXME https://github.com/pydata/xarray/issues/4287
22-
# - pandas>=1.0
23-
- pandas=1.0
21+
- pandas>=1.0
2422
- rasterio>=1.1
2523
- seaborn
2624
- setuptools

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ Bug fixes
8686
By `Jens Svensmark <https://github.com/jenssss>`_
8787
- Fix incorrect legend labels for :py:meth:`Dataset.plot.scatter` (:issue:`4126`).
8888
By `Peter Hausamann <https://github.com/phausamann>`_.
89+
- Fix indexing with datetime64 scalars with pandas 1.1 (:issue:`4283`).
90+
By `Stephan Hoyer <https://github.com/shoyer>`_ and
91+
`Justus Magin <https://github.com/keewis>`_.
8992

9093
Documentation
9194
~~~~~~~~~~~~~

xarray/core/common.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,14 +1126,22 @@ def resample(
11261126
dim_name = dim
11271127
dim_coord = self[dim]
11281128

1129-
if isinstance(self.indexes[dim_name], CFTimeIndex):
1130-
from .resample_cftime import CFTimeGrouper
1131-
1132-
grouper = CFTimeGrouper(freq, closed, label, base, loffset)
1133-
else:
1134-
grouper = pd.Grouper(
1135-
freq=freq, closed=closed, label=label, base=base, loffset=loffset
1129+
# TODO: remove once pandas=1.1 is the minimum required version
1130+
with warnings.catch_warnings():
1131+
warnings.filterwarnings(
1132+
"ignore",
1133+
r"'(base|loffset)' in .resample\(\) and in Grouper\(\) is deprecated.",
1134+
category=FutureWarning,
11361135
)
1136+
1137+
if isinstance(self.indexes[dim_name], CFTimeIndex):
1138+
from .resample_cftime import CFTimeGrouper
1139+
1140+
grouper = CFTimeGrouper(freq, closed, label, base, loffset)
1141+
else:
1142+
grouper = pd.Grouper(
1143+
freq=freq, closed=closed, label=label, base=base, loffset=loffset
1144+
)
11371145
group = DataArray(
11381146
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
11391147
)

xarray/core/indexing.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,10 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
178178
else _asarray_tuplesafe(label)
179179
)
180180
if label.ndim == 0:
181+
# see https://github.com/pydata/xarray/pull/4292 for details
182+
label_value = label[()] if label.dtype.kind in "mM" else label.item()
181183
if isinstance(index, pd.MultiIndex):
182-
indexer, new_index = index.get_loc_level(label.item(), level=0)
184+
indexer, new_index = index.get_loc_level(label_value, level=0)
183185
elif isinstance(index, pd.CategoricalIndex):
184186
if method is not None:
185187
raise ValueError(
@@ -189,11 +191,9 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
189191
raise ValueError(
190192
"'tolerance' is not a valid kwarg when indexing using a CategoricalIndex."
191193
)
192-
indexer = index.get_loc(label.item())
194+
indexer = index.get_loc(label_value)
193195
else:
194-
indexer = index.get_loc(
195-
label.item(), method=method, tolerance=tolerance
196-
)
196+
indexer = index.get_loc(label_value, method=method, tolerance=tolerance)
197197
elif label.dtype.kind == "b":
198198
indexer = label
199199
else:

xarray/tests/test_dataarray.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -938,7 +938,7 @@ def test_sel_invalid_slice(self):
938938
with raises_regex(ValueError, "cannot use non-scalar arrays"):
939939
array.sel(x=slice(array.x))
940940

941-
def test_sel_dataarray_datetime(self):
941+
def test_sel_dataarray_datetime_slice(self):
942942
# regression test for GH1240
943943
times = pd.date_range("2000-01-01", freq="D", periods=365)
944944
array = DataArray(np.arange(365), [("time", times)])
@@ -1078,6 +1078,12 @@ def test_loc(self):
10781078
assert_identical(da[:3, :4], da.loc[["a", "b", "c"], np.arange(4)])
10791079
assert_identical(da[:, :4], da.loc[:, self.ds["y"] < 4])
10801080

1081+
def test_loc_datetime64_value(self):
1082+
# regression test for https://github.com/pydata/xarray/issues/4283
1083+
t = np.array(["2017-09-05T12", "2017-09-05T15"], dtype="datetime64[ns]")
1084+
array = DataArray(np.ones(t.shape), dims=("time",), coords=(t,))
1085+
assert_identical(array.loc[{"time": t[0]}], array[0])
1086+
10811087
def test_loc_assign(self):
10821088
self.ds["x"] = ("x", np.array(list("abcdefghij")))
10831089
da = self.ds["foo"]

xarray/tests/test_indexing.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@ def test_convert_label_indexer(self):
8686
with pytest.raises(IndexError):
8787
indexing.convert_label_indexer(mindex, (slice(None), 1, "no_level"))
8888

89+
def test_convert_label_indexer_datetime(self):
90+
index = pd.to_datetime(["2000-01-01", "2001-01-01", "2002-01-01"])
91+
actual = indexing.convert_label_indexer(index, "2001-01-01")
92+
expected = (1, None)
93+
assert actual == expected
94+
95+
actual = indexing.convert_label_indexer(index, index.to_numpy()[1])
96+
assert actual == expected
97+
8998
def test_convert_unsorted_datetime_index_raises(self):
9099
index = pd.to_datetime(["2001", "2000", "2002"])
91100
with pytest.raises(KeyError):

0 commit comments

Comments
 (0)