diff --git a/ci/azure/install.yml b/ci/azure/install.yml
index fee886ba804..baa69bcc8d5 100644
--- a/ci/azure/install.yml
+++ b/ci/azure/install.yml
@@ -16,9 +16,9 @@ steps:
--pre \
--upgrade \
matplotlib \
+ numpy \
pandas \
scipy
- # numpy \ # FIXME https://github.com/pydata/xarray/issues/3409
pip install \
--no-deps \
--upgrade \
diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
index 10fe69253e8..820160b19cc 100644
--- a/ci/requirements/py36.yml
+++ b/ci/requirements/py36.yml
@@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- - numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
+ - numpy
- pandas
- pint
- pip
diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
index 827c664a222..4a7aaf7d32b 100644
--- a/ci/requirements/py37.yml
+++ b/ci/requirements/py37.yml
@@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- - numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
+ - numpy
- pandas
- pint
- pip
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index cb274bcaee8..0c929b5b711 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -115,6 +115,12 @@ Bug fixes
(:issue:`3402`). By `Deepak Cherian `_
- Allow appending datetime and bool data variables to zarr stores.
(:issue:`3480`). By `Akihiro Matsukawa `_.
+- Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend
+ (:issue:`3409`, :pull:`3537`). By `Guido Imperiale `_.
+- Add support for pandas >=0.26 (:issue:`3440`).
+ By `Deepak Cherian `_.
+- Add support for pseudonetcdf >=3.1 (:pull:`3485`).
+ By `Barron Henderson `_.
Documentation
~~~~~~~~~~~~~
@@ -133,7 +139,6 @@ Documentation
Internal Changes
~~~~~~~~~~~~~~~~
-
- Added integration tests against `pint `_.
(:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
by `Justus Magin `_.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 5de254614ff..c631a4c11ea 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -5316,7 +5316,9 @@ def _integrate_one(self, coord, datetime_unit=None):
datetime_unit, _ = np.datetime_data(coord_var.dtype)
elif datetime_unit is None:
datetime_unit = "s" # Default to seconds for cftime objects
- coord_var = datetime_to_numeric(coord_var, datetime_unit=datetime_unit)
+ coord_var = coord_var._replace(
+ data=datetime_to_numeric(coord_var.data, datetime_unit=datetime_unit)
+ )
variables = {}
coord_names = set()
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 71e79335c3d..cf616acb485 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -351,6 +351,26 @@ def f(values, axis=None, skipna=None, **kwargs):
_mean = _create_nan_agg_method("mean")
+def _datetime_nanmin(array):
+ """nanmin() function for datetime64.
+
+ Caveats that this function deals with:
+
+ - In numpy < 1.18, min() on datetime64 incorrectly ignores NaT
+ - numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
+ - dask min() does not work on datetime64 (all versions at the moment of writing)
+ """
+ assert array.dtype.kind in "mM"
+ dtype = array.dtype
+ # (NaT).astype(float) does not produce NaN...
+ array = where(pandas_isnull(array), np.nan, array.astype(float))
+ array = min(array, skipna=True)
+ if isinstance(array, float):
+ array = np.array(array)
+ # ...but (NaN).astype("M8") does produce NaT
+ return array.astype(dtype)
+
+
def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.
@@ -370,7 +390,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""
# TODO: make this function dask-compatible?
if offset is None:
- offset = array.min()
+ if array.dtype.kind in "Mm":
+ offset = _datetime_nanmin(array)
+ else:
+ offset = min(array)
array = array - offset
if not hasattr(array, "dtype"): # scalar is converted to 0d-array
@@ -401,7 +424,8 @@ def mean(array, axis=None, skipna=None, **kwargs):
array = asarray(array)
if array.dtype.kind in "Mm":
- offset = min(array)
+ offset = _datetime_nanmin(array)
+
# xarray always uses np.datetime64[ns] for np.datetime64 data
dtype = "timedelta64[ns]"
return (
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index be40ce7c6e8..de074da541f 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -5874,7 +5874,9 @@ def test_trapz_datetime(dask, which_datetime):
actual = da.integrate("time", datetime_unit="D")
expected_data = np.trapz(
- da, duck_array_ops.datetime_to_numeric(da["time"], datetime_unit="D"), axis=0
+ da.data,
+ duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"),
+ axis=0,
)
expected = xr.DataArray(
expected_data,
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index f678af2fec5..aee7bbd6b11 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -274,23 +274,39 @@ def assert_dask_array(da, dask):
@arm_xfail
-@pytest.mark.parametrize("dask", [False, True])
-def test_datetime_reduce(dask):
- time = np.array(pd.date_range("15/12/1999", periods=11))
- time[8:11] = np.nan
- da = DataArray(np.linspace(0, 365, num=11), dims="time", coords={"time": time})
-
- if dask and has_dask:
- chunks = {"time": 5}
- da = da.chunk(chunks)
-
- actual = da["time"].mean()
- assert not pd.isnull(actual)
- actual = da["time"].mean(skipna=False)
- assert pd.isnull(actual)
-
- # test for a 0d array
- assert da["time"][0].mean() == da["time"][:1].mean()
+@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
+def test_datetime_mean(dask):
+ # Note: only testing numpy, as dask is broken upstream
+ da = DataArray(
+ np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8"),
+ dims=["time"],
+ )
+ if dask:
+ # Trigger use case where a chunk is full of NaT
+ da = da.chunk({"time": 3})
+
+ expect = DataArray(np.array("2010-01-02", dtype="M8"))
+ expect_nat = DataArray(np.array("NaT", dtype="M8"))
+
+ actual = da.mean()
+ if dask:
+ assert actual.chunks is not None
+ assert_equal(actual, expect)
+
+ actual = da.mean(skipna=False)
+ if dask:
+ assert actual.chunks is not None
+ assert_equal(actual, expect_nat)
+
+ # tests for 1d array full of NaT
+ assert_equal(da[[1]].mean(), expect_nat)
+ assert_equal(da[[1]].mean(skipna=False), expect_nat)
+
+ # tests for a 0d array
+ assert_equal(da[0].mean(), da[0])
+ assert_equal(da[0].mean(skipna=False), da[0])
+ assert_equal(da[1].mean(), expect_nat)
+ assert_equal(da[1].mean(skipna=False), expect_nat)
@requires_cftime