Fix indexing with datetime64[ns] with pandas=1.1 (#4292)

shoyer · keewis · max-sixty · web-flow · commit 59f57f3e410a · 2020-09-15T18:33:29.000-07:00
* Fix indexing with datetime64[ns] with pandas=1.1 Fixes #4283 The underlying issue is that calling `.item()` on a NumPy array with `dtype=datetime64[ns]` returns an _integer_, rather than an `np.datetime64 scalar. This is somewhat baffling but works this way because `.item()` returns native Python types, but `datetime.datetime` doesn't support nanosecond precision. `pandas.Index.get_loc` used to support these integers, but now is more strict. Hence we get errors. We can fix this by using `array[()]` to convert 0d arrays into NumPy scalars instead of calling `array.item()`. I've added a crude regression test. There may well be a better way to test this but I haven't figured it out yet. * lint fix * add a test checking the datetime indexer * use label.item() for non-datetime / timedelta labels * unpin pandas in the docs * ignore the future warning about deprecated arguments to pandas.Grouper * Update xarray/core/indexing.py Co-authored-by: keewis <keewis@users.noreply.github.com> * Add whatsnew note Co-authored-by: Keewis <keewis@posteo.de> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: keewis <keewis@users.noreply.github.com>
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -18,9 +18,7 @@ dependencies:
   - netcdf4>=1.5
   - numba
   - numpy>=1.17
-  # FIXME https://github.com/pydata/xarray/issues/4287
-  # - pandas>=1.0
-  - pandas=1.0
+  - pandas>=1.0
   - rasterio>=1.1
   - seaborn
   - setuptools
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -86,6 +86,9 @@ Bug fixes
   By `Jens Svensmark <https://github.com/jenssss>`_
 - Fix incorrect legend labels for :py:meth:`Dataset.plot.scatter` (:issue:`4126`).
   By `Peter Hausamann <https://github.com/phausamann>`_.
+- Fix indexing with datetime64 scalars with pandas 1.1 (:issue:`4283`).
+  By `Stephan Hoyer <https://github.com/shoyer>`_ and
+  `Justus Magin <https://github.com/keewis>`_.
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -1126,14 +1126,22 @@ def resample(
         dim_name = dim
         dim_coord = self[dim]
 
-        if isinstance(self.indexes[dim_name], CFTimeIndex):
-            from .resample_cftime import CFTimeGrouper
-
-            grouper = CFTimeGrouper(freq, closed, label, base, loffset)
-        else:
-            grouper = pd.Grouper(
-                freq=freq, closed=closed, label=label, base=base, loffset=loffset
+        # TODO: remove once pandas=1.1 is the minimum required version
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                r"'(base|loffset)' in .resample\(\) and in Grouper\(\) is deprecated.",
+                category=FutureWarning,
             )
+
+            if isinstance(self.indexes[dim_name], CFTimeIndex):
+                from .resample_cftime import CFTimeGrouper
+
+                grouper = CFTimeGrouper(freq, closed, label, base, loffset)
+            else:
+                grouper = pd.Grouper(
+                    freq=freq, closed=closed, label=label, base=base, loffset=loffset
+                )
         group = DataArray(
             dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
         )
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -178,8 +178,10 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
             else _asarray_tuplesafe(label)
         )
         if label.ndim == 0:
+            # see https://github.com/pydata/xarray/pull/4292 for details
+            label_value = label[()] if label.dtype.kind in "mM" else label.item()
             if isinstance(index, pd.MultiIndex):
-                indexer, new_index = index.get_loc_level(label.item(), level=0)
+                indexer, new_index = index.get_loc_level(label_value, level=0)
             elif isinstance(index, pd.CategoricalIndex):
                 if method is not None:
                     raise ValueError(
@@ -189,11 +191,9 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No
                     raise ValueError(
                         "'tolerance' is not a valid kwarg when indexing using a CategoricalIndex."
                     )
-                indexer = index.get_loc(label.item())
+                indexer = index.get_loc(label_value)
             else:
-                indexer = index.get_loc(
-                    label.item(), method=method, tolerance=tolerance
-                )
+                indexer = index.get_loc(label_value, method=method, tolerance=tolerance)
         elif label.dtype.kind == "b":
             indexer = label
         else:
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -938,7 +938,7 @@ def test_sel_invalid_slice(self):
         with raises_regex(ValueError, "cannot use non-scalar arrays"):
             array.sel(x=slice(array.x))
 
-    def test_sel_dataarray_datetime(self):
+    def test_sel_dataarray_datetime_slice(self):
         # regression test for GH1240
         times = pd.date_range("2000-01-01", freq="D", periods=365)
         array = DataArray(np.arange(365), [("time", times)])
@@ -1078,6 +1078,12 @@ def test_loc(self):
         assert_identical(da[:3, :4], da.loc[["a", "b", "c"], np.arange(4)])
         assert_identical(da[:, :4], da.loc[:, self.ds["y"] < 4])
 
+    def test_loc_datetime64_value(self):
+        # regression test for https://github.com/pydata/xarray/issues/4283
+        t = np.array(["2017-09-05T12", "2017-09-05T15"], dtype="datetime64[ns]")
+        array = DataArray(np.ones(t.shape), dims=("time",), coords=(t,))
+        assert_identical(array.loc[{"time": t[0]}], array[0])
+
     def test_loc_assign(self):
         self.ds["x"] = ("x", np.array(list("abcdefghij")))
         da = self.ds["foo"]
diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py
@@ -86,6 +86,15 @@ def test_convert_label_indexer(self):
         with pytest.raises(IndexError):
             indexing.convert_label_indexer(mindex, (slice(None), 1, "no_level"))
 
+    def test_convert_label_indexer_datetime(self):
+        index = pd.to_datetime(["2000-01-01", "2001-01-01", "2002-01-01"])
+        actual = indexing.convert_label_indexer(index, "2001-01-01")
+        expected = (1, None)
+        assert actual == expected
+
+        actual = indexing.convert_label_indexer(index, index.to_numpy()[1])
+        assert actual == expected
+
     def test_convert_unsorted_datetime_index_raises(self):
         index = pd.to_datetime(["2001", "2000", "2002"])
         with pytest.raises(KeyError):