Skip to content

Commit

Permalink
Numpy 1.18 support (#3537)
Browse files Browse the repository at this point in the history
* Closes #3409

* Unpin versions

* Rewrite unit test for clarity about its real scope

* mean() on dask

* Trivial

* duck_array_ops should never receive xarray.Variable
  • Loading branch information
crusaderky authored Nov 19, 2019
1 parent 980a1d2 commit 45fd0e6
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 25 deletions.
2 changes: 1 addition & 1 deletion ci/azure/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ steps:
--pre \
--upgrade \
matplotlib \
numpy \
pandas \
scipy
# numpy \ # FIXME https://github.com/pydata/xarray/issues/3409
pip install \
--no-deps \
--upgrade \
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py36.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- numpy
- pandas
- pint
- pip
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/py37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- nc-time-axis
- netcdf4
- numba
- numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409
- numpy
- pandas
- pint
- pip
Expand Down
7 changes: 6 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ Bug fixes
(:issue:`3402`). By `Deepak Cherian <https://github.com/dcherian/>`_
- Allow appending datetime and bool data variables to zarr stores.
(:issue:`3480`). By `Akihiro Matsukawa <https://github.com/amatsukawa/>`_.
- Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend
(:issue:`3409`, :pull:`3537`). By `Guido Imperiale <https://github.com/crusaderky>`_.
- Add support for pandas >=0.26 (:issue:`3440`).
By `Deepak Cherian <https://github.com/dcherian>`_.
- Add support for pseudonetcdf >=3.1 (:pull:`3485`).
By `Barron Henderson <https://github.com/barronh>`_.

Documentation
~~~~~~~~~~~~~
Expand All @@ -133,7 +139,6 @@ Documentation

Internal Changes
~~~~~~~~~~~~~~~~

- Added integration tests against `pint <https://pint.readthedocs.io/>`_.
(:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`)
by `Justus Magin <https://github.com/keewis>`_.
Expand Down
4 changes: 3 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5316,7 +5316,9 @@ def _integrate_one(self, coord, datetime_unit=None):
datetime_unit, _ = np.datetime_data(coord_var.dtype)
elif datetime_unit is None:
datetime_unit = "s" # Default to seconds for cftime objects
coord_var = datetime_to_numeric(coord_var, datetime_unit=datetime_unit)
coord_var = coord_var._replace(
data=datetime_to_numeric(coord_var.data, datetime_unit=datetime_unit)
)

variables = {}
coord_names = set()
Expand Down
28 changes: 26 additions & 2 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,26 @@ def f(values, axis=None, skipna=None, **kwargs):
_mean = _create_nan_agg_method("mean")


def _datetime_nanmin(array):
"""nanmin() function for datetime64.
Caveats that this function deals with:
- In numpy < 1.18, min() on datetime64 incorrectly ignores NaT
- numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
- dask min() does not work on datetime64 (all versions at the moment of writing)
"""
assert array.dtype.kind in "mM"
dtype = array.dtype
# (NaT).astype(float) does not produce NaN...
array = where(pandas_isnull(array), np.nan, array.astype(float))
array = min(array, skipna=True)
if isinstance(array, float):
array = np.array(array)
# ...but (NaN).astype("M8") does produce NaT
return array.astype(dtype)


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.
Expand All @@ -370,7 +390,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""
# TODO: make this function dask-compatible?
if offset is None:
offset = array.min()
if array.dtype.kind in "Mm":
offset = _datetime_nanmin(array)
else:
offset = min(array)
array = array - offset

if not hasattr(array, "dtype"): # scalar is converted to 0d-array
Expand Down Expand Up @@ -401,7 +424,8 @@ def mean(array, axis=None, skipna=None, **kwargs):

array = asarray(array)
if array.dtype.kind in "Mm":
offset = min(array)
offset = _datetime_nanmin(array)

# xarray always uses np.datetime64[ns] for np.datetime64 data
dtype = "timedelta64[ns]"
return (
Expand Down
4 changes: 3 additions & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5874,7 +5874,9 @@ def test_trapz_datetime(dask, which_datetime):

actual = da.integrate("time", datetime_unit="D")
expected_data = np.trapz(
da, duck_array_ops.datetime_to_numeric(da["time"], datetime_unit="D"), axis=0
da.data,
duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"),
axis=0,
)
expected = xr.DataArray(
expected_data,
Expand Down
50 changes: 33 additions & 17 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,23 +274,39 @@ def assert_dask_array(da, dask):


@arm_xfail
@pytest.mark.parametrize("dask", [False, True])
def test_datetime_reduce(dask):
time = np.array(pd.date_range("15/12/1999", periods=11))
time[8:11] = np.nan
da = DataArray(np.linspace(0, 365, num=11), dims="time", coords={"time": time})

if dask and has_dask:
chunks = {"time": 5}
da = da.chunk(chunks)

actual = da["time"].mean()
assert not pd.isnull(actual)
actual = da["time"].mean(skipna=False)
assert pd.isnull(actual)

# test for a 0d array
assert da["time"][0].mean() == da["time"][:1].mean()
@pytest.mark.parametrize("dask", [False, True] if has_dask else [False])
def test_datetime_mean(dask):
# Note: only testing numpy, as dask is broken upstream
da = DataArray(
np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8"),
dims=["time"],
)
if dask:
# Trigger use case where a chunk is full of NaT
da = da.chunk({"time": 3})

expect = DataArray(np.array("2010-01-02", dtype="M8"))
expect_nat = DataArray(np.array("NaT", dtype="M8"))

actual = da.mean()
if dask:
assert actual.chunks is not None
assert_equal(actual, expect)

actual = da.mean(skipna=False)
if dask:
assert actual.chunks is not None
assert_equal(actual, expect_nat)

# tests for 1d array full of NaT
assert_equal(da[[1]].mean(), expect_nat)
assert_equal(da[[1]].mean(skipna=False), expect_nat)

# tests for a 0d array
assert_equal(da[0].mean(), da[0])
assert_equal(da[0].mean(skipna=False), da[0])
assert_equal(da[1].mean(), expect_nat)
assert_equal(da[1].mean(skipna=False), expect_nat)


@requires_cftime
Expand Down

0 comments on commit 45fd0e6

Please sign in to comment.