From 8a148b69f36f9049e5eb31dc4ae87872369b005f Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 25 Nov 2019 07:57:18 -0800 Subject: [PATCH 01/28] add environment file for binderized examples (#3568) --- .binder/environment.yml | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .binder/environment.yml diff --git a/.binder/environment.yml b/.binder/environment.yml new file mode 100644 index 00000000000..13b6b99e6fc --- /dev/null +++ b/.binder/environment.yml @@ -0,0 +1,39 @@ +name: xarray-examples +channels: + - conda-forge +dependencies: + - python=3.7 + - boto3 + - bottleneck + - cartopy + - cdms2 + - cfgrib + - cftime + - coveralls + - dask + - distributed + - dask_labextension + - h5netcdf + - h5py + - hdf5 + - iris + - lxml # Optional dep of pydap + - matplotlib + - nc-time-axis + - netcdf4 + - numba + - numpy + - pandas + - pint + - pip + - pydap + - pynio + - rasterio + - scipy + - seaborn + - sparse + - toolz + - xarray + - zarr + - pip: + - numbagg From 7dfdfcaa481fe6b42302f231cb02c35601c628db Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 25 Nov 2019 15:57:48 +0000 Subject: [PATCH 02/28] Reimplement quantile with apply_ufunc (#3559) * Reimplement quantile with apply_ufunc * Update xarray/core/variable.py Co-Authored-By: Stephan Hoyer * Update doc/whats-new.rst --- doc/whats-new.rst | 3 ++ xarray/core/dataset.py | 6 +--- xarray/core/variable.py | 63 ++++++++++++++++++---------------- xarray/tests/test_dataarray.py | 27 +++++++++------ xarray/tests/test_dataset.py | 28 ++++++++------- xarray/tests/test_variable.py | 33 +++++++++++------- 6 files changed, 91 insertions(+), 69 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1d239e18fcd..f811c7d15f0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` + now work with dask Variables. + By `Deepak Cherian `_. Bug fixes diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fdddde773c1..1793dd2d94d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5166,11 +5166,7 @@ def quantile( new = self._replace_with_new_dims( variables, coord_names=coord_names, attrs=attrs, indexes=indexes ) - if "quantile" in new.dims: - new.coords["quantile"] = Variable("quantile", q) - else: - new.coords["quantile"] = q - return new + return new.assign_coords(quantile=q) def rank(self, dim, pct=False, keep_attrs=None): """Ranks the data. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 55e8f64d56c..041c303dd3a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1716,40 +1716,45 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile """ - if isinstance(self.data, dask_array_type): - raise TypeError( - "quantile does not work for arrays stored as dask " - "arrays. Load the data via .compute() or .load() " - "prior to calling this method." - ) - q = np.asarray(q, dtype=np.float64) - - new_dims = list(self.dims) - if dim is not None: - axis = self.get_axis_num(dim) - if utils.is_scalar(dim): - new_dims.remove(dim) - else: - for d in dim: - new_dims.remove(d) - else: - axis = None - new_dims = [] - - # Only add the quantile dimension if q is array-like - if q.ndim != 0: - new_dims = ["quantile"] + new_dims - - qs = np.nanpercentile( - self.data, q * 100.0, axis=axis, interpolation=interpolation - ) + from .computation import apply_ufunc if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - attrs = self._attrs if keep_attrs else None - return Variable(new_dims, qs, attrs) + scalar = utils.is_scalar(q) + q = np.atleast_1d(np.asarray(q, dtype=np.float64)) + + if dim is None: + dim = self.dims + + if utils.is_scalar(dim): + dim = [dim] + + def _wrapper(npa, **kwargs): + # move quantile axis to end. required for apply_ufunc + return np.moveaxis(np.nanpercentile(npa, **kwargs), 0, -1) + + axis = np.arange(-1, -1 * len(dim) - 1, -1) + result = apply_ufunc( + _wrapper, + self, + input_core_dims=[dim], + exclude_dims=set(dim), + output_core_dims=[["quantile"]], + output_dtypes=[np.float64], + output_sizes={"quantile": len(q)}, + dask="parallelized", + kwargs={"q": q * 100, "axis": axis, "interpolation": interpolation}, + ) + + # for backward compatibility + result = result.transpose("quantile", ...) + if scalar: + result = result.squeeze("quantile") + if keep_attrs: + result.attrs = self._attrs + return result def rank(self, dim, pct=False): """Ranks the data. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ad98792372e..a1e34abd0d5 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -15,6 +15,8 @@ from xarray.core import dtypes from xarray.core.common import full_like from xarray.core.indexes import propagate_indexes +from xarray.core.utils import is_scalar + from xarray.tests import ( LooseVersion, ReturnItem, @@ -2330,17 +2332,20 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) - def test_quantile(self): - for q in [0.25, [0.50], [0.25, 0.75]]: - for axis, dim in zip( - [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]] - ): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile( - self.dv.values, np.array(q) * 100, axis=axis - ) - np.testing.assert_allclose(actual.values, expected) - assert actual.attrs == self.attrs + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + @pytest.mark.parametrize( + "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) + ) + def test_quantile(self, q, axis, dim): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) + expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + np.testing.assert_allclose(actual.values, expected) + if is_scalar(q): + assert "quantile" not in actual.dims + else: + assert "quantile" in actual.dims + + assert actual.attrs == self.attrs def test_reduce_keep_attrs(self): # Test dropped attrs diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e8fe768b783..d8282f58051 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -28,6 +28,7 @@ from xarray.core.common import duck_array_ops, full_like from xarray.core.npcompat import IS_NEP18_ACTIVE from xarray.core.pycompat import integer_types +from xarray.core.utils import is_scalar from . import ( InaccessibleArray, @@ -4575,21 +4576,24 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) - def test_quantile(self): - + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + def test_quantile(self, q): ds = create_test_data(seed=123) - for q in [0.25, [0.50], [0.25, 0.75]]: - for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) - assert "quantile" in ds_quantile - for var, dar in ds.data_vars.items(): - assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) - dim = ["dim1", "dim2"] + for dim in [None, "dim1", ["dim1"]]: ds_quantile = ds.quantile(q, dim=dim) - assert "dim3" in ds_quantile.dims - assert all(d not in ds_quantile.dims for d in dim) + if is_scalar(q): + assert "quantile" not in ds_quantile.dims + else: + assert "quantile" in ds_quantile.dims + + for var, dar in ds.data_vars.items(): + assert var in ds_quantile + assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + dim = ["dim1", "dim2"] + ds_quantile = ds.quantile(q, dim=dim) + assert "dim3" in ds_quantile.dims + assert all(d not in ds_quantile.dims for d in dim) @requires_bottleneck def test_rank(self): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index ee8d54e567e..245dc1acc42 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -22,6 +22,7 @@ PandasIndexAdapter, VectorizedIndexer, ) +from xarray.core.pycompat import dask_array_type from xarray.core.utils import NDArrayMixin from xarray.core.variable import as_compatible_data, as_variable from xarray.tests import requires_bottleneck @@ -1492,23 +1493,31 @@ def test_reduce(self): with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): v.mean(dim="x", allow_lazy=False) - def test_quantile(self): + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + @pytest.mark.parametrize( + "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) + ) + def test_quantile(self, q, axis, dim): v = Variable(["x", "y"], self.d) - for q in [0.25, [0.50], [0.25, 0.75]]: - for axis, dim in zip( - [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]] - ): - actual = v.quantile(q, dim=dim) + actual = v.quantile(q, dim=dim) + expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + np.testing.assert_allclose(actual.values, expected) - expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) - np.testing.assert_allclose(actual.values, expected) + @requires_dask + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + @pytest.mark.parametrize("axis, dim", [[1, "y"], [[1], ["y"]]]) + def test_quantile_dask(self, q, axis, dim): + v = Variable(["x", "y"], self.d).chunk({"x": 2}) + actual = v.quantile(q, dim=dim) + assert isinstance(actual.data, dask_array_type) + expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + np.testing.assert_allclose(actual.values, expected) @requires_dask - def test_quantile_dask_raises(self): - # regression for GH1524 - v = Variable(["x", "y"], self.d).chunk(2) + def test_quantile_chunked_dim_error(self): + v = Variable(["x", "y"], self.d).chunk({"x": 2}) - with raises_regex(TypeError, "arrays stored as dask"): + with raises_regex(ValueError, "dimension 'x'"): v.quantile(0.5, dim="x") @requires_dask From 0197bf2522e66bee7f9e35505929a46fdb6246ea Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 27 Nov 2019 20:39:06 +0100 Subject: [PATCH 03/28] Add pyXpcm to Related Projects doc page (#3578) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index fd77ce56a0a..a8af05f3074 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -25,6 +25,7 @@ Geosciences - `PyGDX `_: Python 3 package for accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom subclass. +- `pyXpcm `_: xarray-based Profile Classification Modelling (PCM), mostly for ocean data. - `Regionmask `_: plotting and creation of masks of spatial regions - `rioxarray `_: geospatial xarray extension powered by rasterio - `salem `_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors. From 4c26d6a639df1d08a3093cd2948a2e9a0107028b Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 27 Nov 2019 20:40:10 +0100 Subject: [PATCH 04/28] add cftime intersphinx entries (#3577) --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index b6edc07f612..65635353e93 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -351,4 +351,5 @@ "numba": ("https://numba.pydata.org/numba-doc/latest", None), "matplotlib": ("https://matplotlib.org", None), "dask": ("https://docs.dask.org/en/latest", None), + "cftime": ("https://unidata.github.io/cftime", None), } From be2ff9d2a756e50bcdf39484d527a8a91348621a Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 28 Nov 2019 04:58:06 +0100 Subject: [PATCH 05/28] Examples for quantile (#3576) * add examples for Dataset.quantile * add examples for DataArray.quantile * add examples for GroupBy.quantile * rename the example sections --- xarray/core/dataarray.py | 37 ++++++++++++++++++++++++++++ xarray/core/dataset.py | 42 +++++++++++++++++++++++++++++++ xarray/core/groupby.py | 53 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1b135a350d1..64f21b0eb01 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2971,6 +2971,43 @@ def quantile( See Also -------- numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile + + Examples + -------- + + >>> da = xr.DataArray( + ... data=[[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]], + ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, + ... dims=("x", "y"), + ... ) + + Single quantile + >>> da.quantile(0) # or da.quantile(0, dim=...) + + array(0.7) + Coordinates: + quantile float64 0.0 + >>> da.quantile(0, dim="x") + + array([0.7, 4.2, 2.6, 1.5]) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + quantile float64 0.0 + + Multiple quantiles + >>> da.quantile([0, 0.5, 1]) + + array([0.7, 3.4, 9.4]) + Coordinates: + * quantile (quantile) float64 0.0 0.5 1.0 + >>> da.quantile([0, 0.5, 1], dim="x") + + array([[0.7 , 4.2 , 2.6 , 1.5 ], + [3.6 , 5.75, 6. , 1.7 ], + [6.5 , 7.3 , 9.4 , 1.9 ]]) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + * quantile (quantile) float64 0.0 0.5 1.0 """ ds = self._to_temp_dataset().quantile( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1793dd2d94d..61dde6a393b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5116,6 +5116,48 @@ def quantile( See Also -------- numpy.nanpercentile, pandas.Series.quantile, DataArray.quantile + + Examples + -------- + + >>> ds = xr.Dataset( + ... {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])}, + ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, + ... ) + + Single quantile + >>> ds.quantile(0) # or ds.quantile(0, dim=...) + + Dimensions: () + Coordinates: + quantile float64 0.0 + Data variables: + a float64 0.7 + >>> ds.quantile(0, dim="x") + + Dimensions: (y: 4) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + quantile float64 0.0 + Data variables: + a (y) float64 0.7 4.2 2.6 1.5 + + Multiple quantiles + >>> ds.quantile([0, 0.5, 1]) + + Dimensions: (quantile: 3) + Coordinates: + * quantile (quantile) float64 0.0 0.5 1.0 + Data variables: + a (quantile) float64 0.7 3.4 9.4 + >>> ds.quantile([0, 0.5, 1], dim="x") + + Dimensions: (quantile: 3, y: 4) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + * quantile (quantile) float64 0.0 0.5 1.0 + Data variables: + a (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9 """ if isinstance(dim, str): diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7e872c74d72..cb8f6538820 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -597,6 +597,59 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): -------- numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile + + Examples + -------- + + >>> da = xr.DataArray( + ... [[1.3, 8.4, 0.7, 6.9], [0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]], + ... coords={"x": [0, 0, 1], "y": [1, 1, 2, 2]}, + ... dims=("y", "y"), + ... ) + >>> ds = xr.Dataset({"a": da}) + + Single quantile + >>> da.groupby("x").quantile(0) + + array([[0.7, 4.2, 0.7, 1.5], + [6.5, 7.3, 2.6, 1.9]]) + Coordinates: + quantile float64 0.0 + * y (y) int64 1 1 2 2 + * x (x) int64 0 1 + >>> ds.groupby("y").quantile(0, dim=...) + + Dimensions: (y: 2) + Coordinates: + quantile float64 0.0 + * y (y) int64 1 2 + Data variables: + a (y) float64 0.7 0.7 + + Multiple quantiles + >>> da.groupby("x").quantile([0, 0.5, 1]) + + array([[[0.7 , 1. , 1.3 ], + [4.2 , 6.3 , 8.4 ], + [0.7 , 5.05, 9.4 ], + [1.5 , 4.2 , 6.9 ]], + + [[6.5 , 6.5 , 6.5 ], + [7.3 , 7.3 , 7.3 ], + [2.6 , 2.6 , 2.6 ], + [1.9 , 1.9 , 1.9 ]]]) + Coordinates: + * y (y) int64 1 1 2 2 + * quantile (quantile) float64 0.0 0.5 1.0 + * x (x) int64 0 1 + >>> ds.groupby("y").quantile([0, 0.5, 1], dim=...) + + Dimensions: (quantile: 3, y: 2) + Coordinates: + * quantile (quantile) float64 0.0 0.5 1.0 + * y (y) int64 1 2 + Data variables: + a (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4 """ if dim is None: dim = self._group_dim From 1e0f108f630e5c43de6c125c5d738a2a0f4a8282 Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 28 Nov 2019 18:19:04 +0100 Subject: [PATCH 06/28] update whats-new.rst (#3581) --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f811c7d15f0..884c3cef91c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -48,6 +48,9 @@ Documentation data. (:pull:`3199`) By `Zach Bruick ` and `Stephan Siemen ` +- Added examples for `DataArray.quantile`, `Dataset.quantile` and + `GroupBy.quantile`. (:pull:`3576`) + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ From 69c85b85a1ef5d13b6ca51b6b655a1f719cc5abf Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 1 Dec 2019 18:57:02 +0000 Subject: [PATCH 07/28] Add bottleneck & rasterio git tip to upstream-dev CI (#3585) * Add upstream bottleneck, netcdf4 & rasterio. * xfail rasterio test * remove netcdf4 --- ci/azure/install.yml | 4 +++- xarray/tests/test_backends.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/azure/install.yml b/ci/azure/install.yml index baa69bcc8d5..e4f3a0b9e16 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -25,7 +25,9 @@ steps: git+https://github.com/dask/dask \ git+https://github.com/dask/distributed \ git+https://github.com/zarr-developers/zarr \ - git+https://github.com/Unidata/cftime + git+https://github.com/Unidata/cftime \ + git+https://github.com/mapbox/rasterio \ + git+https://github.com/pydata/bottleneck condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index de3a7eadab0..1e135ebd3e1 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3951,6 +3951,7 @@ def test_rasterio_environment(self): with xr.open_rasterio(tmp_file) as actual: assert_allclose(actual, expected) + @pytest.mark.xfail(reason="rasterio 1.1.1 is broken. GH3573") def test_rasterio_vrt(self): import rasterio From ed05f9862622b00f40f7b9b99ccdb0ab3766ff0f Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 3 Dec 2019 19:59:40 +0100 Subject: [PATCH 08/28] Resolve the version issues on RTD (#3589) * add the project root to PYTHONPATH and sys.path * don't install the package * update the requirements for the documentation environment so we definitely trigger the version collision. * comment on the reasons for the sys.path and os.environ modifications --- ci/requirements/doc.yml | 7 ++----- doc/conf.py | 6 ++++++ readthedocs.yml | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index f2c09ed6fef..97488e7f581 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -6,7 +6,7 @@ dependencies: - python=3.7 - bottleneck - cartopy - - eccodes + - cfgrib - h5netcdf - ipykernel - ipython @@ -21,8 +21,5 @@ dependencies: - seaborn - sphinx - sphinx_rtd_theme + - xarray - zarr - - pip - - pip: - - cfgrib - diff --git a/doc/conf.py b/doc/conf.py index 65635353e93..11abda6bb63 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,10 +15,16 @@ import datetime import os +import pathlib import subprocess import sys from contextlib import suppress +# make sure the source version is preferred (#3567) +root = pathlib.Path(__file__).absolute().parent.parent +os.environ["PYTHONPATH"] = str(root) +sys.path.insert(0, str(root)) + import xarray allowed_failures = set() diff --git a/readthedocs.yml b/readthedocs.yml index 6429780e7d7..c64fa1b7b02 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -4,5 +4,5 @@ conda: file: ci/requirements/doc.yml python: version: 3.7 - setup_py_install: true + setup_py_install: false formats: [] From 308bb37e91097d53101b46f1825f0e80a93258f1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 4 Dec 2019 16:11:16 +0000 Subject: [PATCH 09/28] make coarsen reductions consistent with reductions on other classes (#3500) * Coarsen now has the same reduction methods as groupby & rolling. This brings in support for coarsen.count as well as passing skipna down to the other reduction functions. * test for count * Test that dims passed to coarsen are present in dataset. * Add whats-new * fix tests. * review comments. * Update doc/whats-new.rst Co-Authored-By: keewis * fix whats-new --- doc/whats-new.rst | 4 ++- xarray/core/nanops.py | 2 +- xarray/core/ops.py | 10 ------- xarray/core/rolling.py | 51 +++++++++++++++++++++++++---------- xarray/core/variable.py | 8 +++--- xarray/tests/test_dataset.py | 14 ++++++---- xarray/tests/test_variable.py | 20 ++++++++++++++ 7 files changed, 74 insertions(+), 35 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 884c3cef91c..ed1e79ce3d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,7 +28,9 @@ New Features - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. - +- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` + and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 17240faf007..f70e96217e8 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1): """ if hasattr(axis, "__len__"): # if tuple or list raise ValueError( - "min_count is not available for reduction " "with more than one dimensions." + "min_count is not available for reduction with more than one dimensions." ) if axis is not None and getattr(result, "ndim", False): diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 78c4466faed..b789f93b4f1 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True): inject_reduce_methods(cls) inject_cum_methods(cls) - - -def inject_coarsen_methods(cls): - # standard numpy reduce methods - methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS] - for name, f in methods: - func = cls._reduce_method(f) - func.__name__ = name - func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__) - setattr(cls, name, func) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index a1864332f4d..ea6d72b2e03 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,12 +1,12 @@ import functools import warnings -from typing import Callable +from typing import Any, Callable, Dict import numpy as np from . import dtypes, duck_array_ops, utils from .dask_array_ops import dask_rolling_wrapper -from .ops import inject_coarsen_methods +from .ops import inject_reduce_methods from .pycompat import dask_array_type try: @@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func): self.side = side self.boundary = boundary + absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] + if absent_dims: + raise ValueError( + f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." + ) if not utils.is_dict_like(coord_func): coord_func = {d: coord_func for d in self.obj.dims} for c in self.obj.coords: @@ -565,18 +570,23 @@ def __repr__(self): class DataArrayCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataarray import DataArray reduced = self.obj.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): @@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs): else: if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v @@ -597,12 +611,17 @@ def wrapped_func(self, **kwargs): class DatasetCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataset import Dataset @@ -610,14 +629,18 @@ def wrapped_func(self, **kwargs): reduced = {} for key, da in self.obj.data_vars.items(): reduced[key] = da.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v.variable @@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs): return wrapped_func -inject_coarsen_methods(DataArrayCoarsen) -inject_coarsen_methods(DatasetCoarsen) +inject_reduce_methods(DataArrayCoarsen) +inject_reduce_methods(DatasetCoarsen) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 041c303dd3a..773dcef0aa1 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1864,9 +1864,9 @@ def rolling_window( ), ) - def coarsen(self, windows, func, boundary="exact", side="left"): + def coarsen(self, windows, func, boundary="exact", side="left", **kwargs): """ - Apply + Apply reduction function. """ windows = {k: v for k, v in windows.items() if k in self.dims} if not windows: @@ -1878,11 +1878,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"): func = getattr(duck_array_ops, name, None) if func is None: raise NameError(f"{name} is not a valid method.") - return type(self)(self.dims, func(reshaped, axis=axes), self._attrs) + return self._replace(data=func(reshaped, axis=axes, **kwargs)) def _coarsen_reshape(self, windows, boundary, side): """ - Construct a reshaped-array for corsen + Construct a reshaped-array for coarsen """ if not utils.is_dict_like(boundary): boundary = {d: boundary for d in windows.keys()} diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d8282f58051..7db1911621b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5497,6 +5497,11 @@ def ds(request): ) +def test_coarsen_absent_dims_error(ds): + with raises_regex(ValueError, "not found in Dataset."): + ds.coarsen(foo=2) + + @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")]) def test_coarsen(ds, dask, boundary, side): @@ -5505,12 +5510,11 @@ def test_coarsen(ds, dask, boundary, side): actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() assert_equal( - actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max() + actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max() ) # coordinate should be mean by default assert_equal( - actual["time"], - ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(), + actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean() ) @@ -5521,8 +5525,8 @@ def test_coarsen_coords(ds, dask): # check if coord_func works actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max() - assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max()) - assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max()) + assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max()) + assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max()) # raise if exact with pytest.raises(ValueError): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 245dc1acc42..5b5aa1a523f 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1833,6 +1833,26 @@ def test_coarsen_2d(self): expected[1, 1] *= 12 / 11 assert_allclose(actual, expected) + v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4)) + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + expected = self.cls(("x", "y"), 4 * np.ones((2, 2))) + assert_equal(actual, expected) + + v[0, 0] = np.nan + v[-1, -1] = np.nan + expected[0, 0] = 3 + expected[-1, -1] = 3 + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False) + expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]]) + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True) + expected = self.cls(("x", "y"), [[10, 18], [42, 35]]) + assert_equal(actual, expected) + @requires_dask class TestVariableWithDask(VariableSubclassobjects): From 577d3a75ea8bb25b99f9d31af8da14210cddff78 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 4 Dec 2019 16:45:12 +0000 Subject: [PATCH 10/28] fix plotting with transposed nondim coords. (#3441) * make plotting work with transposed nondim coords. * Additional test. * Test to make sure transpose is right * Undo the transpose change and add test to make sure transposition is right. * fix whats-new merge. * proper fix. * fix whats-new * Fix whats-new --- doc/whats-new.rst | 2 ++ xarray/plot/plot.py | 25 ++++++++++++++++++------- xarray/tests/test_plot.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ed1e79ce3d1..8930947a2a6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) + By `Deepak Cherian `_. Documentation diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 16a4943627e..d38c9765352 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -672,10 +672,22 @@ def newplotfunc( # check if we need to broadcast one dimension if xval.ndim < yval.ndim: - xval = np.broadcast_to(xval, yval.shape) + dims = darray[ylab].dims + if xval.shape[0] == yval.shape[0]: + xval = np.broadcast_to(xval[:, np.newaxis], yval.shape) + else: + xval = np.broadcast_to(xval[np.newaxis, :], yval.shape) - if yval.ndim < xval.ndim: - yval = np.broadcast_to(yval, xval.shape) + elif yval.ndim < xval.ndim: + dims = darray[xlab].dims + if yval.shape[0] == xval.shape[0]: + yval = np.broadcast_to(yval[:, np.newaxis], xval.shape) + else: + yval = np.broadcast_to(yval[np.newaxis, :], xval.shape) + elif xval.ndim == 2: + dims = darray[xlab].dims + else: + dims = (darray[ylab].dims[0], darray[xlab].dims[0]) # May need to transpose for correct x, y labels # xlab may be the name of a coord, we have to check for dim names @@ -685,10 +697,9 @@ def newplotfunc( # we transpose to (y, x, color) to make this work. yx_dims = (ylab, xlab) dims = yx_dims + tuple(d for d in darray.dims if d not in yx_dims) - if dims != darray.dims: - darray = darray.transpose(*dims, transpose_coords=True) - elif darray[xlab].dims[-1] == darray.dims[0]: - darray = darray.transpose(transpose_coords=True) + + if dims != darray.dims: + darray = darray.transpose(*dims, transpose_coords=True) # Pass the data as a masked ndarray too zval = darray.to_masked_array(copy=False) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 6e283ea01da..a10f0d9a67e 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -265,6 +265,7 @@ def test2d_1d_2d_coordinates_contourf(self): ) a.plot.contourf(x="time", y="depth") + a.plot.contourf(x="depth", y="time") def test3d(self): self.darray.plot() @@ -2149,3 +2150,31 @@ def test_yticks_kwarg(self, da): da.plot(yticks=np.arange(5)) expected = np.arange(5) assert np.all(plt.gca().get_yticks() == expected) + + +@requires_matplotlib +@pytest.mark.parametrize("plotfunc", ["pcolormesh", "contourf", "contour"]) +def test_plot_transposed_nondim_coord(plotfunc): + x = np.linspace(0, 10, 101) + h = np.linspace(3, 7, 101) + s = np.linspace(0, 1, 51) + z = s[:, np.newaxis] * h[np.newaxis, :] + da = xr.DataArray( + np.sin(x) * np.cos(z), + dims=["s", "x"], + coords={"x": x, "s": s, "z": (("s", "x"), z), "zt": (("x", "s"), z.T)}, + ) + getattr(da.plot, plotfunc)(x="x", y="zt") + getattr(da.plot, plotfunc)(x="zt", y="x") + + +@requires_matplotlib +@pytest.mark.parametrize("plotfunc", ["pcolormesh", "imshow"]) +def test_plot_transposes_properly(plotfunc): + # test that we aren't mistakenly transposing when the 2 dimensions have equal sizes. + da = xr.DataArray([np.sin(2 * np.pi / 10 * np.arange(10))] * 10, dims=("y", "x")) + hdl = getattr(da.plot, plotfunc)(x="x", y="y") + # get_array doesn't work for contour, contourf. It returns the colormap intervals. + # pcolormesh returns 1D array but imshow returns a 2D array so it is necessary + # to ravel() on the LHS + assert np.all(hdl.get_array().ravel() == da.to_masked_array().ravel()) From cf17317f3570a2a1db2b2e665ea148f931b96c5c Mon Sep 17 00:00:00 2001 From: keewis Date: Thu, 5 Dec 2019 17:21:21 +0100 Subject: [PATCH 11/28] remove xarray again (#3591) --- ci/requirements/doc.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 97488e7f581..a0c27a30b01 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -21,5 +21,4 @@ dependencies: - seaborn - sphinx - sphinx_rtd_theme - - xarray - zarr From 87a25b64898c94ea1e2a2e7a06d31ef602b116bf Mon Sep 17 00:00:00 2001 From: crusaderky Date: Thu, 5 Dec 2019 16:39:39 +0000 Subject: [PATCH 12/28] 2x~5x speed up for isel() in most cases (#3533) * Speed up isel in most cases * What's New * Trivial * Use _replace * isort * Code review * What's New * mypy --- doc/whats-new.rst | 8 +++--- xarray/coding/cftime_offsets.py | 2 +- xarray/core/dataarray.py | 26 ++++++++++++++++--- xarray/core/dataset.py | 45 ++++++++++++++++++++++++++++++++- xarray/core/formatting_html.py | 4 +-- xarray/core/indexing.py | 13 ++++++++++ xarray/core/variable.py | 5 +++- xarray/tests/test_dask.py | 2 +- xarray/tests/test_dataarray.py | 1 - xarray/tests/test_missing.py | 2 +- xarray/tests/test_variable.py | 22 ++++++++++++++++ 11 files changed, 116 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8930947a2a6..d4d8ab8f3e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,7 +37,6 @@ Bug fixes - Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) By `Deepak Cherian `_. - Documentation ~~~~~~~~~~~~~ - Switch doc examples to use nbsphinx and replace sphinx_gallery with @@ -58,8 +57,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ - - +- 2x to 5x speed boost (on small arrays) for :py:meth:`Dataset.isel`, + :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int, + slice, list of int, scalar ndarray, or 1-dimensional ndarray. + (:pull:`3533`) by `Guido Imperiale `_. - Removed internal method ``Dataset._from_vars_and_coord_names``, which was dominated by ``Dataset._construct_direct``. (:pull:`3565`) By `Maximilian Roos `_ @@ -190,6 +191,7 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ + - Added integration tests against `pint `_. (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`) by `Justus Magin `_. diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 8471ed1a558..eeb68508527 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -42,6 +42,7 @@ import re from datetime import timedelta +from distutils.version import LooseVersion from functools import partial from typing import ClassVar, Optional @@ -50,7 +51,6 @@ from ..core.pdcompat import count_not_none from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso from .times import format_cftime_datetime -from distutils.version import LooseVersion def get_date_type(calendar): diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 64f21b0eb01..20de0cffbc2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -50,7 +50,8 @@ ) from .dataset import Dataset, split_indexes from .formatting import format_item -from .indexes import Indexes, propagate_indexes, default_indexes +from .indexes import Indexes, default_indexes, propagate_indexes +from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, _extract_indexes_from_coords from .options import OPTIONS from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs @@ -1027,8 +1028,27 @@ def isel( DataArray.sel """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") - ds = self._to_temp_dataset().isel(drop=drop, indexers=indexers) - return self._from_temp_dataset(ds) + if any(is_fancy_indexer(idx) for idx in indexers.values()): + ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop) + return self._from_temp_dataset(ds) + + # Much faster algorithm for when all indexers are ints, slices, one-dimensional + # lists, or zero or one-dimensional np.ndarray's + + variable = self._variable.isel(indexers) + + coords = {} + for coord_name, coord_value in self._coords.items(): + coord_indexers = { + k: v for k, v in indexers.items() if k in coord_value.dims + } + if coord_indexers: + coord_value = coord_value.isel(coord_indexers) + if drop and coord_value.ndim == 0: + continue + coords[coord_name] = coord_value + + return self._replace(variable=variable, coords=coords) def sel( self, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 61dde6a393b..5926fd4ff36 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -66,6 +66,7 @@ propagate_indexes, roll_index, ) +from .indexing import is_fancy_indexer from .merge import ( dataset_merge_method, dataset_update_method, @@ -78,8 +79,8 @@ Default, Frozen, SortedKeysDict, - _default, _check_inplace, + _default, decode_numpy_dict_values, either_dict_or_kwargs, hashable, @@ -1907,6 +1908,48 @@ def isel( DataArray.isel """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + if any(is_fancy_indexer(idx) for idx in indexers.values()): + return self._isel_fancy(indexers, drop=drop) + + # Much faster algorithm for when all indexers are ints, slices, one-dimensional + # lists, or zero or one-dimensional np.ndarray's + invalid = indexers.keys() - self.dims.keys() + if invalid: + raise ValueError("dimensions %r do not exist" % invalid) + + variables = {} + dims: Dict[Hashable, Tuple[int, ...]] = {} + coord_names = self._coord_names.copy() + indexes = self._indexes.copy() if self._indexes is not None else None + + for var_name, var_value in self._variables.items(): + var_indexers = {k: v for k, v in indexers.items() if k in var_value.dims} + if var_indexers: + var_value = var_value.isel(var_indexers) + if drop and var_value.ndim == 0 and var_name in coord_names: + coord_names.remove(var_name) + if indexes: + indexes.pop(var_name, None) + continue + if indexes and var_name in indexes: + if var_value.ndim == 1: + indexes[var_name] = var_value.to_index() + else: + del indexes[var_name] + variables[var_name] = var_value + dims.update(zip(var_value.dims, var_value.shape)) + + return self._construct_direct( + variables=variables, + coord_names=coord_names, + dims=dims, + attrs=self._attrs, + indexes=indexes, + encoding=self._encoding, + file_obj=self._file_obj, + ) + + def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset": # Note: we need to preserve the original indexers variable in order to merge the # coords below indexers_list = list(self._validate_indexers(indexers)) diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index dbebbcf4fbe..8ceda8bfbfa 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -1,11 +1,11 @@ import uuid -import pkg_resources from collections import OrderedDict from functools import partial from html import escape -from .formatting import inline_variable_array_repr, short_data_repr +import pkg_resources +from .formatting import inline_variable_array_repr, short_data_repr CSS_FILE_PATH = "/".join(("static", "css", "style.css")) CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8") diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index f48c9e72af1..8e851b39c3e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1213,6 +1213,19 @@ def posify_mask_indexer(indexer): return type(indexer)(key) +def is_fancy_indexer(indexer: Any) -> bool: + """Return False if indexer is a int, slice, a 1-dimensional list, or a 0 or + 1-dimensional ndarray; in all other cases return True + """ + if isinstance(indexer, (int, slice)): + return False + if isinstance(indexer, np.ndarray): + return indexer.ndim > 1 + if isinstance(indexer, list): + return bool(indexer) and not isinstance(indexer[0], int) + return True + + class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin): """Wrap a NumPy array to use explicit indexing.""" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 773dcef0aa1..aa04cffb5ea 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -617,7 +617,10 @@ def _broadcast_indexes_outer(self, key): k = k.data if not isinstance(k, BASIC_INDEXING_TYPES): k = np.asarray(k) - if k.dtype.kind == "b": + if k.size == 0: + # Slice by empty list; numpy could not infer the dtype + k = k.astype(int) + elif k.dtype.kind == "b": (k,) = np.nonzero(k) new_key.append(k) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 4c1f317342f..f3b10e3370c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -16,6 +16,7 @@ from xarray.testing import assert_chunks_equal from xarray.tests import mock +from ..core.duck_array_ops import lazy_array_equiv from . import ( assert_allclose, assert_array_equal, @@ -25,7 +26,6 @@ raises_regex, requires_scipy_or_netCDF4, ) -from ..core.duck_array_ops import lazy_array_equiv from .test_backends import create_tmp_file dask = pytest.importorskip("dask") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a1e34abd0d5..f957316d8ac 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -16,7 +16,6 @@ from xarray.core.common import full_like from xarray.core.indexes import propagate_indexes from xarray.core.utils import is_scalar - from xarray.tests import ( LooseVersion, ReturnItem, diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 0b410383a34..1cd0319a9a5 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -9,8 +9,8 @@ NumpyInterpolator, ScipyInterpolator, SplineInterpolator, - get_clean_interp_index, _get_nan_block_lengths, + get_clean_interp_index, ) from xarray.core.pycompat import dask_array_type from xarray.tests import ( diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 5b5aa1a523f..1d83e16a5bd 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1156,6 +1156,26 @@ def test_items(self): def test_getitem_basic(self): v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]]) + # int argument + v_new = v[0] + assert v_new.dims == ("y",) + assert_array_equal(v_new, v._data[0]) + + # slice argument + v_new = v[:2] + assert v_new.dims == ("x", "y") + assert_array_equal(v_new, v._data[:2]) + + # list arguments + v_new = v[[0]] + assert v_new.dims == ("x", "y") + assert_array_equal(v_new, v._data[[0]]) + + v_new = v[[]] + assert v_new.dims == ("x", "y") + assert_array_equal(v_new, v._data[[]]) + + # dict arguments v_new = v[dict(x=0)] assert v_new.dims == ("y",) assert_array_equal(v_new, v._data[0]) @@ -1196,6 +1216,8 @@ def test_isel(self): assert_identical(v.isel(time=0), v[0]) assert_identical(v.isel(time=slice(0, 3)), v[:3]) assert_identical(v.isel(x=0), v[:, 0]) + assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]]) + assert_identical(v.isel(time=[]), v[[]]) with raises_regex(ValueError, "do not exist"): v.isel(not_a_dim=0) From 4c51aa2c2ed953a59ab2c8ba5e127cd988ba67dd Mon Sep 17 00:00:00 2001 From: keewis Date: Fri, 6 Dec 2019 16:37:46 +0100 Subject: [PATCH 13/28] Silence sphinx warnings: Round 2 (#3592) * remove the sections from the quantile examples sphinx' doctest extension does not work too well with them. * remove the attributes section from DataArray's docstring * fix author error issues * remove the newline in the GroupBy.quantile examples this will probably make doctest fail but it fixes the sphinx warnings. * fix a few more links * CI: disable nit-picky warnings and fail on new warnings --- azure-pipelines.yml | 2 +- doc/whats-new.rst | 14 +++++++------- xarray/core/dataarray.py | 17 ----------------- xarray/core/dataset.py | 4 ---- xarray/core/groupby.py | 5 ----- 5 files changed, 8 insertions(+), 34 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 90de0705a27..d6ee76c7d3f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -110,5 +110,5 @@ jobs: - bash: | source activate xarray-tests cd doc - sphinx-build -n -j auto -b html -d _build/doctrees . _build/html + sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html displayName: Build HTML docs diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d4d8ab8f3e5..96e5eeacf95 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,7 +30,7 @@ New Features By `Deepak Cherian `_. - Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) - By `Deepak Cherian `_ + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ @@ -42,17 +42,17 @@ Documentation - Switch doc examples to use nbsphinx and replace sphinx_gallery with notebook. (:pull:`3105`, :pull:`3106`, :pull:`3121`) - By `Ryan Abernathey ` + By `Ryan Abernathey `_ - Added example notebook demonstrating use of xarray with Regional Ocean Modeling System (ROMS) ocean hydrodynamic model output. (:pull:`3116`). - By `Robert Hetland ` + By `Robert Hetland `_ - Added example notebook demonstrating the visualization of ERA5 GRIB data. (:pull:`3199`) - By `Zach Bruick ` and - `Stephan Siemen ` -- Added examples for `DataArray.quantile`, `Dataset.quantile` and - `GroupBy.quantile`. (:pull:`3576`) + By `Zach Bruick `_ and + `Stephan Siemen `_ +- Added examples for :py:meth:`DataArray.quantile`, :py:meth:`Dataset.quantile` and + ``GroupBy.quantile``. (:pull:`3576`) By `Justus Magin `_. Internal Changes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 20de0cffbc2..b649df6dd56 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -235,19 +235,6 @@ class DataArray(AbstractArray, DataWithCoords): Getting items from or doing mathematical operations with a DataArray always returns another DataArray. - - Attributes - ---------- - dims : tuple - Dimension names associated with this array. - values : numpy.ndarray - Access or modify DataArray values as a numpy array. - coords : dict-like - Dictionary of DataArray objects that label values along each dimension. - name : str or None - Name of this array. - attrs : dict - Dictionary for holding arbitrary metadata. """ _cache: Dict[str, Any] @@ -3000,8 +2987,6 @@ def quantile( ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, ... dims=("x", "y"), ... ) - - Single quantile >>> da.quantile(0) # or da.quantile(0, dim=...) array(0.7) @@ -3013,8 +2998,6 @@ def quantile( Coordinates: * y (y) float64 1.0 1.5 2.0 2.5 quantile float64 0.0 - - Multiple quantiles >>> da.quantile([0, 0.5, 1]) array([0.7, 3.4, 9.4]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5926fd4ff36..cc821becd6f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5167,8 +5167,6 @@ def quantile( ... {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])}, ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, ... ) - - Single quantile >>> ds.quantile(0) # or ds.quantile(0, dim=...) Dimensions: () @@ -5184,8 +5182,6 @@ def quantile( quantile float64 0.0 Data variables: a (y) float64 0.7 4.2 2.6 1.5 - - Multiple quantiles >>> ds.quantile([0, 0.5, 1]) Dimensions: (quantile: 3) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index cb8f6538820..5b52f48413d 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -607,8 +607,6 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): ... dims=("y", "y"), ... ) >>> ds = xr.Dataset({"a": da}) - - Single quantile >>> da.groupby("x").quantile(0) array([[0.7, 4.2, 0.7, 1.5], @@ -625,15 +623,12 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * y (y) int64 1 2 Data variables: a (y) float64 0.7 0.7 - - Multiple quantiles >>> da.groupby("x").quantile([0, 0.5, 1]) array([[[0.7 , 1. , 1.3 ], [4.2 , 6.3 , 8.4 ], [0.7 , 5.05, 9.4 ], [1.5 , 4.2 , 6.9 ]], - [[6.5 , 6.5 , 6.5 ], [7.3 , 7.3 , 7.3 ], [2.6 , 2.6 , 2.6 ], From cafcaeea897894e3a2f44a38bd33c50a48c86215 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 22:30:18 -0600 Subject: [PATCH 14/28] Fix map_blocks HLG layering (#3598) * Fix map_blocks HLG layering This fixes an issue with the HighLevelGraph noted in https://github.com/pydata/xarray/pull/3584, and exposed by a recent change in Dask to do more HLG fusion. * update * black * update --- doc/whats-new.rst | 2 ++ xarray/core/parallel.py | 24 +++++++++++++++++++++--- xarray/tests/test_dask.py | 13 +++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 96e5eeacf95..554f0bc4695 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,8 @@ Bug fixes ~~~~~~~~~ - Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) By `Deepak Cherian `_. +- Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`) + By `Tom Augspurger `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index fbb5ef94ca2..dd6c67338d8 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -7,12 +7,14 @@ except ImportError: pass +import collections import itertools import operator from typing import ( Any, Callable, Dict, + DefaultDict, Hashable, Mapping, Sequence, @@ -221,7 +223,12 @@ def _wrapper(func, obj, to_array, args, kwargs): indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} indexes.update({k: template.indexes[k] for k in new_indexes}) + # We're building a new HighLevelGraph hlg. We'll have one new layer + # for each variable in the dataset, which is the result of the + # func applied to the values. + graph: Dict[Any, Any] = {} + new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict) gname = "{}-{}".format( dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs) ) @@ -310,9 +317,20 @@ def _wrapper(func, obj, to_array, args, kwargs): # unchunked dimensions in the input have one chunk in the result key += (0,) - graph[key] = (operator.getitem, from_wrapper, name) + # We're adding multiple new layers to the graph: + # The first new layer is the result of the computation on + # the array. + # Then we add one layer per variable, which extracts the + # result for that variable, and depends on just the first new + # layer. + new_layers[gname_l][key] = (operator.getitem, from_wrapper, name) + + hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset]) - graph = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset]) + for gname_l, layer in new_layers.items(): + # This adds in the getitems for each variable in the dataset. + hlg.dependencies[gname_l] = {gname} + hlg.layers[gname_l] = layer result = Dataset(coords=indexes, attrs=template.attrs) for name, gname_l in var_key_map.items(): @@ -325,7 +343,7 @@ def _wrapper(func, obj, to_array, args, kwargs): var_chunks.append((len(indexes[dim]),)) data = dask.array.Array( - graph, name=gname_l, chunks=var_chunks, dtype=template[name].dtype + hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype ) result[name] = (dims, data, template[name].attrs) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index f3b10e3370c..6122e987154 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1189,6 +1189,19 @@ def func(obj): assert_identical(expected.compute(), actual.compute()) +def test_map_blocks_hlg_layers(): + # regression test for #3599 + ds = xr.Dataset( + { + "x": (("a",), dask.array.ones(10, chunks=(5,))), + "z": (("b",), dask.array.ones(10, chunks=(5,))), + } + ) + mapped = ds.map_blocks(lambda x: x) + + xr.testing.assert_equal(mapped, ds) + + def test_make_meta(map_ds): from ..core.parallel import make_meta From 1c446d374e81afcd174a6a2badda9121d2d776c0 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 7 Dec 2019 15:37:59 -0500 Subject: [PATCH 15/28] Minor fix to combine_by_coords to allow for the combination of CFTimeIndexes separated by large time intervals (#3543) * Select first element of each index * black * Add comment to test * Move what's new entry to unreleased section * Fix link in what's new * Remove newline after what's new entry --- doc/whats-new.rst | 3 +++ xarray/core/combine.py | 2 +- xarray/tests/test_combine.py | 24 +++++++++++++++++++++++- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 554f0bc4695..0a3406c3ebe 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix :py:meth:`xarray.combine_by_coords` when combining cftime coordinates + which span long time intervals (:issue:`3535`). By `Spencer Clark + `_. - Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) By `Deepak Cherian `_. - Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index b9db30a9f92..65087b05cc0 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -88,7 +88,7 @@ def _infer_concat_order_from_coords(datasets): # with the same value have the same coord values throughout. if any(index.size == 0 for index in indexes): raise ValueError("Cannot handle size zero dimensions") - first_items = pd.Index([index.take([0]) for index in indexes]) + first_items = pd.Index([index[0] for index in indexes]) # Sort datasets along dim # We want rank but with identical elements given identical diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index cd26e7fb60b..a29fe0190cf 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -22,7 +22,7 @@ _new_tile_id, ) -from . import assert_equal, assert_identical, raises_regex +from . import assert_equal, assert_identical, raises_regex, requires_cftime from .test_dataset import create_test_data @@ -877,3 +877,25 @@ def test_auto_combine_without_coords(self): objs = [Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})] with pytest.warns(FutureWarning, match="supplied do not have global"): auto_combine(objs) + + +@requires_cftime +def test_combine_by_coords_distant_cftime_dates(): + # Regression test for https://github.com/pydata/xarray/issues/3535 + import cftime + + time_1 = [cftime.DatetimeGregorian(4500, 12, 31)] + time_2 = [cftime.DatetimeGregorian(4600, 12, 31)] + time_3 = [cftime.DatetimeGregorian(5100, 12, 31)] + + da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() + da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() + da_3 = DataArray([2], dims=["time"], coords=[time_3], name="a").to_dataset() + + result = combine_by_coords([da_1, da_2, da_3]) + + expected_time = np.concatenate([time_1, time_2, time_3]) + expected = DataArray( + [0, 1, 2], dims=["time"], coords=[expected_time], name="a" + ).to_dataset() + assert_identical(result, expected) From 5c674e6212ed5a5daeb3f289f023d6abc48417ae Mon Sep 17 00:00:00 2001 From: keewis Date: Mon, 9 Dec 2019 11:25:51 +0100 Subject: [PATCH 16/28] Fix pint integration tests (#3600) * don't use the index to construct the condition array * use drop_vars to drop variables and coordinates * update the xfail reason for astype and item * remove the xfails due to np.result_type not being implemented * make array_extract_units a bit more robust * add the missing dataset apply_ufunc test * use None as the dict key for quantities * convert to variable to avoid the merge / assign_coords issue relevant issue: #3483 * update the align tests * fix the bugs introduced by converting to variables * update a few more tests * update the aggregation tests * update a few more tests * don't test rank which was deprecated in numpy 1.9 * update the DataArray.fillna tests * update most of the remaining DataArray tests * remove a debug assert * fix the broadcast_equals tests * update the indexing tests * update the tests depending on einsum * update the squeeze tests * fix a bug in the head / tail / thin test design * use dictionaries to index in the loc tests * update the expected unit of the computation tests * update the grouped operations tests * update the where tests * update most of the remaining dataset tests * create new tests for drop_sel * final batch of updated tests * rename result to actual to match the other test files * fix some more test bugs * update the xfail marks --- xarray/tests/test_units.py | 1998 ++++++++++++++++++------------------ 1 file changed, 1019 insertions(+), 979 deletions(-) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 0be6f8af464..f8a8a259c1f 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -29,9 +29,11 @@ def array_extract_units(obj): - raw = obj.data if hasattr(obj, "data") else obj + if isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)): + obj = obj.data + try: - return raw.units + return obj.units except AttributeError: return None @@ -112,7 +114,7 @@ def extract_units(obj): units = {**vars_units, **coords_units} elif isinstance(obj, Quantity): - vars_units = {"": array_extract_units(obj)} + vars_units = {None: array_extract_units(obj)} units = {**vars_units} else: @@ -203,21 +205,25 @@ def attach_units(obj, units): def convert_units(obj, to): if isinstance(obj, xr.Dataset): data_vars = { - name: convert_units(array, to) for name, array in obj.data_vars.items() + name: convert_units(array.variable, {None: to.get(name)}) + for name, array in obj.data_vars.items() + } + coords = { + name: convert_units(array.variable, {None: to.get(name)}) + for name, array in obj.coords.items() } - coords = {name: convert_units(array, to) for name, array in obj.coords.items()} new_obj = xr.Dataset(data_vars=data_vars, coords=coords, attrs=obj.attrs) elif isinstance(obj, xr.DataArray): name = obj.name new_units = ( - to.get(name, None) or to.get("data", None) or to.get(None, None) or 1 + to.get(name, None) or to.get("data", None) or to.get(None, None) or None ) - data = convert_units(obj.data, {None: new_units}) + data = convert_units(obj.variable, {None: new_units}) coords = { - name: (array.dims, convert_units(array.data, to)) + name: (array.dims, convert_units(array.variable, {None: to.get(name)})) for name, array in obj.coords.items() if name != obj.name } @@ -225,6 +231,9 @@ def convert_units(obj, to): new_obj = xr.DataArray( name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims ) + elif isinstance(obj, xr.Variable): + new_data = convert_units(obj.data, to) + new_obj = obj.copy(data=new_data) elif isinstance(obj, unit_registry.Quantity): units = to.get(None) new_obj = obj.to(units) if units is not None else obj @@ -344,14 +353,34 @@ def test_apply_ufunc_dataarray(dtype): data_array = xr.DataArray(data=array, dims="x", coords={"x": x}) expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) - result = func(data_array) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail( - reason="pint does not implement `np.result_type` and align strips units" -) +def test_apply_ufunc_dataset(dtype): + func = function( + xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} + ) + + array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m + array2 = np.linspace(0, 10, 5).astype(dtype) * unit_registry.m + + x = np.arange(5) * unit_registry.s + y = np.arange(10) * unit_registry.m + + ds = xr.Dataset( + data_vars={"a": (("x", "y"), array1), "b": ("x", array2)}, + coords={"x": x, "y": y}, + ) + + expected = attach_units(func(strip_units(ds)), extract_units(ds)) + actual = func(ds) + + assert_equal_with_units(expected, actual) + + +@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -378,9 +407,9 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): original_unit = unit_registry.m variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), + "data": (unit, 1, 1), + "dims": (original_unit, unit, 1), + "coords": (original_unit, 1, unit), } data_unit, dim_unit, coord_unit = variants.get(variant) @@ -410,32 +439,27 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): stripped_kwargs = { key: strip_units( - convert_units(value, {None: original_unit}) - if isinstance(value, unit_registry.Quantity) - else value + convert_units(value, {None: original_unit if data_unit != 1 else None}) ) for key, value in func.kwargs.items() } - units = extract_units(data_array1) - # FIXME: should the expected_b have the same units as data_array1 - # or data_array2? - expected_a, expected_b = tuple( - attach_units(elem, units) - for elem in func( - strip_units(data_array1), - strip_units(convert_units(data_array2, units)), - **stripped_kwargs, - ) + units_a = extract_units(data_array1) + units_b = extract_units(data_array2) + expected_a, expected_b = func( + strip_units(data_array1), + strip_units(convert_units(data_array2, units_a)), + **stripped_kwargs, ) - result_a, result_b = func(data_array1, data_array2) + expected_a = attach_units(expected_a, units_a) + expected_b = convert_units(attach_units(expected_b, units_a), units_b) - assert_equal_with_units(expected_a, result_a) - assert_equal_with_units(expected_b, result_b) + actual_a, actual_b = func(data_array1, data_array2) + assert_equal_with_units(expected_a, actual_a) + assert_equal_with_units(expected_b, actual_b) -@pytest.mark.xfail( - reason="pint does not implement `np.result_type` and align strips units" -) + +@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -461,11 +485,7 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): def test_align_dataset(fill_value, unit, variant, error, dtype): original_unit = unit_registry.m - variants = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), - } + variants = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)} data_unit, dim_unit, coord_unit = variants.get(variant) array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit @@ -497,24 +517,22 @@ def test_align_dataset(fill_value, unit, variant, error, dtype): stripped_kwargs = { key: strip_units( - convert_units(value, {None: original_unit}) - if isinstance(value, unit_registry.Quantity) - else value + convert_units(value, {None: original_unit if data_unit != 1 else None}) ) for key, value in func.kwargs.items() } - units = extract_units(ds1) - # FIXME: should the expected_b have the same units as ds1 or ds2? - expected_a, expected_b = tuple( - attach_units(elem, units) - for elem in func( - strip_units(ds1), strip_units(convert_units(ds2, units)), **stripped_kwargs - ) + units_a = extract_units(ds1) + units_b = extract_units(ds2) + expected_a, expected_b = func( + strip_units(ds1), strip_units(convert_units(ds2, units_a)), **stripped_kwargs ) - result_a, result_b = func(ds1, ds2) + expected_a = attach_units(expected_a, units_a) + expected_b = convert_units(attach_units(expected_b, units_a), units_b) - assert_equal_with_units(expected_a, result_a) - assert_equal_with_units(expected_b, result_b) + actual_a, actual_b = func(ds1, ds2) + + assert_equal_with_units(expected_a, actual_a) + assert_equal_with_units(expected_b, actual_b) def test_broadcast_dataarray(dtype): @@ -528,10 +546,10 @@ def test_broadcast_dataarray(dtype): attach_units(elem, extract_units(a)) for elem in xr.broadcast(strip_units(a), strip_units(b)) ) - result_a, result_b = xr.broadcast(a, b) + actual_a, actual_b = xr.broadcast(a, b) - assert_equal_with_units(expected_a, result_a) - assert_equal_with_units(expected_b, result_b) + assert_equal_with_units(expected_a, actual_a) + assert_equal_with_units(expected_b, actual_b) def test_broadcast_dataset(dtype): @@ -543,12 +561,11 @@ def test_broadcast_dataset(dtype): (expected,) = tuple( attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds)) ) - (result,) = xr.broadcast(ds) + (actual,) = xr.broadcast(ds) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="`combine_by_coords` strips units") @pytest.mark.parametrize( "unit,error", ( @@ -614,12 +631,11 @@ def test_combine_by_coords(variant, unit, error, dtype): ), units, ) - result = xr.combine_by_coords([ds, other]) + actual = xr.combine_by_coords([ds, other]) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="blocked by `where`") @pytest.mark.parametrize( "unit,error", ( @@ -628,7 +644,12 @@ def test_combine_by_coords(variant, unit, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param( + unit_registry.mm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="wrong order of arguments to `where`"), + ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ids=repr, @@ -714,12 +735,11 @@ def test_combine_nested(variant, unit, error, dtype): ), units, ) - result = func([[ds1, ds2], [ds3, ds4]]) + actual = func([[ds1, ds2], [ds3, ds4]]) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="`concat` strips units") @pytest.mark.parametrize( "unit,error", ( @@ -760,15 +780,18 @@ def test_concat_dataarray(variant, unit, error, dtype): return + units = extract_units(arr1) expected = attach_units( - xr.concat([strip_units(arr1), strip_units(arr2)], dim="x"), extract_units(arr1) + xr.concat( + [strip_units(arr1), strip_units(convert_units(arr2, units))], dim="x" + ), + units, ) - result = xr.concat([arr1, arr2], dim="x") + actual = xr.concat([arr1, arr2], dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="`concat` strips units") @pytest.mark.parametrize( "unit,error", ( @@ -809,15 +832,17 @@ def test_concat_dataset(variant, unit, error, dtype): return + units = extract_units(ds1) expected = attach_units( - xr.concat([strip_units(ds1), strip_units(ds2)], dim="x"), extract_units(ds1) + xr.concat([strip_units(ds1), strip_units(convert_units(ds2, units))], dim="x"), + units, ) - result = xr.concat([ds1, ds2], dim="x") + actual = xr.concat([ds1, ds2], dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -902,12 +927,12 @@ def test_merge_dataarray(variant, unit, error, dtype): func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]), units, ) - result = func([arr1, arr2, arr3]) + actual = func([arr1, arr2, arr3]) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.xfail(reason="blocked by `reindex` / `where`") @pytest.mark.parametrize( "unit,error", ( @@ -985,9 +1010,9 @@ def test_merge_dataset(variant, unit, error, dtype): expected = attach_units( func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units ) - result = func([ds1, ds2, ds3]) + actual = func([ds1, ds2, ds3]) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -997,9 +1022,9 @@ def test_replication_dataarray(func, dtype): numpy_func = getattr(np, func.__name__) expected = xr.DataArray(data=numpy_func(array), dims="x") - result = func(data_array) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) @@ -1019,9 +1044,9 @@ def test_replication_dataset(func, dtype): expected = ds.copy( data={name: numpy_func(array.data) for name, array in ds.data_vars.items()} ) - result = func(ds) + actual = func(ds) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.xfail( @@ -1051,11 +1076,16 @@ def test_replication_full_like_dataarray(unit, error, dtype): if error is not None: with pytest.raises(error): xr.full_like(data_array, fill_value=fill_value) - else: - result = xr.full_like(data_array, fill_value=fill_value) - expected = np.full_like(array, fill_value=fill_value) - assert_equal_with_units(expected, result) + return + + units = {**extract_units(data_array), **{None: unit if unit != 1 else None}} + expected = attach_units( + xr.full_like(strip_units(data_array), fill_value=strip_units(fill_value)), units + ) + actual = xr.full_like(data_array, fill_value=fill_value) + + assert_equal_with_units(expected, actual) @pytest.mark.xfail( @@ -1096,18 +1126,18 @@ def test_replication_full_like_dataset(unit, error, dtype): return - expected = ds.copy( - data={ - name: np.full_like(array, fill_value=fill_value) - for name, array in ds.data_vars.items() - } + units = { + **extract_units(ds), + **{name: unit if unit != 1 else None for name in ds.data_vars}, + } + expected = attach_units( + xr.full_like(strip_units(ds), fill_value=strip_units(fill_value)), units ) - result = xr.full_like(ds, fill_value=fill_value) + actual = xr.full_like(ds, fill_value=fill_value) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="`where` strips units") @pytest.mark.parametrize( "unit,error", ( @@ -1127,30 +1157,29 @@ def test_where_dataarray(fill_value, unit, error, dtype): x = xr.DataArray(data=array, dims="x") cond = x < 5 * unit_registry.m - # FIXME: this should work without wrapping in array() - fill_value = np.array(fill_value) * unit + fill_value = fill_value * unit - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): xr.where(cond, x, fill_value) return - fill_value_ = ( - fill_value.to(unit_registry.m) - if isinstance(fill_value, unit_registry.Quantity) - and fill_value.check(unit_registry.m) - else fill_value - ) expected = attach_units( - xr.where(cond, strip_units(x), strip_units(fill_value_)), extract_units(x) + xr.where( + cond, + strip_units(x), + strip_units(convert_units(fill_value, {None: unit_registry.m})), + ), + extract_units(x), ) - result = xr.where(cond, x, fill_value) + actual = xr.where(cond, x, fill_value) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="`where` strips units") @pytest.mark.parametrize( "unit,error", ( @@ -1171,31 +1200,30 @@ def test_where_dataset(fill_value, unit, error, dtype): x = np.arange(10) * unit_registry.s ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) - cond = ds.x < 5 * unit_registry.s - # FIXME: this should work without wrapping in array() - fill_value = np.array(fill_value) * unit + cond = x < 5 * unit_registry.s + fill_value = fill_value * unit - if error is not None: + if error is not None and not ( + np.isnan(fill_value) and not isinstance(fill_value, Quantity) + ): with pytest.raises(error): xr.where(cond, ds, fill_value) return - fill_value_ = ( - fill_value.to(unit_registry.m) - if isinstance(fill_value, unit_registry.Quantity) - and fill_value.check(unit_registry.m) - else fill_value - ) expected = attach_units( - xr.where(cond, strip_units(ds), strip_units(fill_value_)), extract_units(ds) + xr.where( + cond, + strip_units(ds), + strip_units(convert_units(fill_value, {None: unit_registry.m})), + ), + extract_units(ds), ) - result = xr.where(cond, ds, fill_value) + actual = xr.where(cond, ds, fill_value) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) -@pytest.mark.xfail(reason="pint does not implement `np.einsum`") def test_dot_dataarray(dtype): array1 = ( np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) @@ -1206,13 +1234,15 @@ def test_dot_dataarray(dtype): np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s ) - arr1 = xr.DataArray(data=array1, dims=("x", "y")) - arr2 = xr.DataArray(data=array2, dims=("y", "z")) + data_array = xr.DataArray(data=array1, dims=("x", "y")) + other = xr.DataArray(data=array2, dims=("y", "z")) - expected = array1.dot(array2) - result = xr.dot(arr1, arr2) + expected = attach_units( + xr.dot(strip_units(data_array), strip_units(other)), {None: unit_registry.m} + ) + actual = xr.dot(data_array, other) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) class TestDataArray: @@ -1295,37 +1325,20 @@ def test_repr(self, func, variant, dtype): function("any"), marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), - pytest.param( - function("argmax"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), - ), - pytest.param( - function("argmin"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), - ), + function("argmax"), + function("argmin"), function("max"), function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail( - reason="np.median on DataArray strips the units" - ), + marks=pytest.mark.xfail(reason="not implemented by xarray"), ), function("min"), pytest.param( function("prod"), marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), - pytest.param( - function("sum"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), - ), + function("sum"), function("std"), function("var"), function("cumsum"), @@ -1341,18 +1354,8 @@ def test_repr(self, func, variant, dtype): method("any"), marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), - pytest.param( - method("argmax"), - marks=pytest.mark.xfail( - reason="comparison of quantities with ndarrays in nanops not implemented" - ), - ), - pytest.param( - method("argmin"), - marks=pytest.mark.xfail( - reason="comparison of quantities with ndarrays in nanops not implemented" - ), - ), + method("argmax"), + method("argmin"), method("max"), method("mean"), method("median"), @@ -1363,12 +1366,7 @@ def test_repr(self, func, variant, dtype): reason="comparison of quantity with ndarrays in nanops not implemented" ), ), - pytest.param( - method("sum"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), - ), + method("sum"), method("std"), method("var"), method("cumsum"), @@ -1380,34 +1378,36 @@ def test_repr(self, func, variant, dtype): ids=repr, ) def test_aggregation(self, func, dtype): - array = np.arange(10).astype(dtype) * unit_registry.m - data_array = xr.DataArray(data=array) + array = np.arange(10).astype(dtype) * ( + unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless + ) + data_array = xr.DataArray(data=array, dims="x") - expected = xr.DataArray(data=func(array)) - result = func(data_array) + # units differ based on the applied function, so we need to + # first compute the units + units = extract_units(func(array)) + expected = attach_units(func(strip_units(data_array)), units) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", ( pytest.param(operator.neg, id="negate"), pytest.param(abs, id="absolute"), - pytest.param( - np.round, - id="round", - marks=pytest.mark.xfail(reason="pint does not implement round"), - ), + pytest.param(np.round, id="round"), ), ) def test_unary_operations(self, func, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array) - expected = xr.DataArray(data=func(array)) - result = func(data_array) + units = extract_units(func(array)) + expected = attach_units(func(strip_units(data_array)), units) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", @@ -1415,23 +1415,18 @@ def test_unary_operations(self, func, dtype): pytest.param(lambda x: 2 * x, id="multiply"), pytest.param(lambda x: x + x, id="add"), pytest.param(lambda x: x[0] + x, id="add scalar"), - pytest.param( - lambda x: x.T @ x, - id="matrix multiply", - marks=pytest.mark.xfail( - reason="pint does not support matrix multiplication yet" - ), - ), + pytest.param(lambda x: x.T @ x, id="matrix multiply"), ), ) def test_binary_operations(self, func, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array) - expected = xr.DataArray(data=func(array)) - result = func(data_array) + units = extract_units(func(array)) + expected = attach_units(func(strip_units(data_array)), units) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "comparison", @@ -1448,8 +1443,9 @@ def test_binary_operations(self, func, dtype): pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), - pytest.param(unit_registry.s, DimensionalityError, id="incorrect_unit"), - pytest.param(unit_registry.m, None, id="correct_unit"), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_comparison_operations(self, comparison, unit, error, dtype): @@ -1469,48 +1465,85 @@ def test_comparison_operations(self, comparison, unit, error, dtype): with pytest.raises(error): comparison(data_array, to_compare_with) - else: - result = comparison(data_array, to_compare_with) - # pint compares incompatible arrays to False, so we need to extend - # the multiplication works for both scalar and array results - expected = xr.DataArray( - data=comparison(array, to_compare_with) - * np.ones_like(array, dtype=bool) - ) - assert_equal_with_units(expected, result) + return + + actual = comparison(data_array, to_compare_with) + + expected_units = {None: unit_registry.m if array.check(unit) else None} + expected = array.check(unit) & comparison( + strip_units(data_array), + strip_units(convert_units(to_compare_with, expected_units)), + ) + + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "units,error", ( pytest.param(unit_registry.dimensionless, None, id="dimensionless"), - pytest.param(unit_registry.m, DimensionalityError, id="incorrect unit"), - pytest.param(unit_registry.degree, None, id="correct unit"), + pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.degree, None, id="compatible_unit"), ), ) def test_univariate_ufunc(self, units, error, dtype): array = np.arange(10).astype(dtype) * units data_array = xr.DataArray(data=array) + func = function("sin") + if error is not None: with pytest.raises(error): np.sin(data_array) - else: - expected = xr.DataArray(data=np.sin(array)) - result = np.sin(data_array) - assert_equal_with_units(expected, result) + return + + expected = attach_units( + func(strip_units(convert_units(data_array, {None: unit_registry.radians}))), + {None: unit_registry.dimensionless}, + ) + actual = func(data_array) + + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="pint's implementation of `np.maximum` strips units") - def test_bivariate_ufunc(self, dtype): - unit = unit_registry.m - array = np.arange(10).astype(dtype) * unit + @pytest.mark.xfail(reason="xarray's `np.maximum` strips units") + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="without_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + def test_bivariate_ufunc(self, unit, error, dtype): + original_unit = unit_registry.m + array = np.arange(10).astype(dtype) * original_unit data_array = xr.DataArray(data=array) - expected = xr.DataArray(np.maximum(array, 0 * unit)) + if error is not None: + with pytest.raises(error): + np.maximum(data_array, 0 * unit) + + return + + expected_units = {None: original_unit} + expected = attach_units( + np.maximum( + strip_units(data_array), + strip_units(convert_units(0 * unit, expected_units)), + ), + expected_units, + ) + + actual = np.maximum(data_array, 0 * unit) + assert_equal_with_units(expected, actual) - assert_equal_with_units(expected, np.maximum(data_array, 0 * unit)) - assert_equal_with_units(expected, np.maximum(0 * unit, data_array)) + actual = np.maximum(0 * unit, data_array) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): @@ -1518,41 +1551,43 @@ def test_numpy_properties(self, property, dtype): np.arange(5 * 10).astype(dtype) + 1j * np.linspace(-1, 0, 5 * 10).astype(dtype) ).reshape(5, 10) * unit_registry.s + data_array = xr.DataArray(data=array, dims=("x", "y")) - expected = xr.DataArray( - data=getattr(array, property), - dims=("x", "y")[:: 1 if property != "T" else -1], + expected = attach_units( + getattr(strip_units(data_array), property), extract_units(data_array) ) - result = getattr(data_array, property) + actual = getattr(data_array, property) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", - ( - method("conj"), - method("argsort"), - method("conjugate"), - method("round"), - pytest.param( - method("rank", dim="x"), - marks=pytest.mark.xfail(reason="pint does not implement rank yet"), - ), - ), + (method("conj"), method("argsort"), method("conjugate"), method("round")), ids=repr, ) def test_numpy_methods(self, func, dtype): array = np.arange(10).astype(dtype) * unit_registry.m data_array = xr.DataArray(data=array, dims="x") - expected = xr.DataArray(func(array), dims="x") - result = func(data_array) + units = extract_units(func(array)) + expected = attach_units(strip_units(data_array), units) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( - "func", (method("clip", min=3, max=8), method("searchsorted", v=5)), ids=repr + "func", + ( + method("clip", min=3, max=8), + pytest.param( + method("searchsorted", v=5), + marks=pytest.mark.xfail( + reason="searchsorted somehow requires a undocumented `keys` argument" + ), + ), + ), + ids=repr, ) @pytest.mark.parametrize( "unit,error", @@ -1575,20 +1610,24 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } - if error is not None: with pytest.raises(error): func(data_array, **kwargs) - else: - expected = func(array, **kwargs) - if func.name not in ["searchsorted"]: - expected = xr.DataArray(data=expected) - result = func(data_array, **kwargs) - if func.name in ["searchsorted"]: - assert np.allclose(expected, result) - else: - assert_equal_with_units(expected, result) + return + + units = extract_units(data_array) + expected_units = extract_units(func(array, **kwargs)) + stripped_kwargs = { + key: strip_units(convert_units(value, units)) + for key, value in kwargs.items() + } + expected = attach_units( + func(strip_units(data_array), **stripped_kwargs), expected_units + ) + actual = func(data_array, **kwargs) + + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -1611,9 +1650,9 @@ def test_missing_value_detection(self, func, dtype): data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) expected = func(strip_units(data_array)) - result = func(data_array) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -1623,48 +1662,67 @@ def test_missing_value_filling(self, func, dtype): * unit_registry.degK ) x = np.arange(len(array)) - data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"]) - - result_without_units = func(strip_units(data_array), dim="x") - result = xr.DataArray( - data=result_without_units.data * unit_registry.degK, - coords={"x": x}, - dims=["x"], - ) + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") expected = attach_units( - func(strip_units(data_array), dim="x"), {"data": unit_registry.degK} + func(strip_units(data_array), dim="x"), extract_units(data_array) ) - result = func(data_array, dim="x") + actual = func(data_array, dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="fillna drops the unit") @pytest.mark.parametrize( - "fill_value", + "unit,error", ( + pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - -1, - id="python scalar", - marks=pytest.mark.xfail( - reason="python scalar cannot be converted using astype()" - ), + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param( + unit_registry.cm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="fillna converts to value's unit"), ), - pytest.param(np.array(-1), id="numpy scalar"), - pytest.param(np.array([-1]), id="numpy array"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "fill_value", + ( + pytest.param(-1, id="python_scalar"), + pytest.param(np.array(-1), id="numpy_scalar"), + pytest.param(np.array([-1]), id="numpy_array"), ), ) - def test_fillna(self, fill_value, dtype): - unit = unit_registry.m - array = np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * unit + def test_fillna(self, fill_value, unit, error, dtype): + original_unit = unit_registry.m + array = ( + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) + * original_unit + ) data_array = xr.DataArray(data=array) + func = method("fillna") + + value = fill_value * unit + if error is not None: + with pytest.raises(error): + func(data_array, value=value) + + return + + units = extract_units(data_array) expected = attach_units( - strip_units(data_array).fillna(value=fill_value), {"data": unit} + func( + strip_units(data_array), value=strip_units(convert_units(value, units)) + ), + units, ) - result = data_array.fillna(value=fill_value * unit) + actual = func(data_array, value=value) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) def test_dropna(self, dtype): array = ( @@ -1674,22 +1732,26 @@ def test_dropna(self, dtype): x = np.arange(len(array)) data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"]) - expected = attach_units( - strip_units(data_array).dropna(dim="x"), {"data": unit_registry.m} - ) - result = data_array.dropna(dim="x") + units = extract_units(data_array) + expected = attach_units(strip_units(data_array).dropna(dim="x"), units) + actual = data_array.dropna(dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="pint does not implement `numpy.isin`") @pytest.mark.parametrize( "unit", ( - pytest.param(1, id="no_unit"), + pytest.param( + 1, + id="no_unit", + marks=pytest.mark.xfail( + reason="pint's isin implementation does not work well with mixed args" + ), + ), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), - pytest.param(unit_registry.m, id="same_unit"), + pytest.param(unit_registry.m, id="identical_unit"), ), ) def test_isin(self, unit, dtype): @@ -1702,33 +1764,26 @@ def test_isin(self, unit, dtype): raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype) values = raw_values * unit - result_without_units = strip_units(data_array).isin(raw_values) - if unit != unit_registry.m: - result_without_units[:] = False - result_with_units = data_array.isin(values) + units = {None: unit_registry.m if array.check(unit) else None} + expected = strip_units(data_array).isin( + strip_units(convert_units(values, units)) + ) & array.check(unit) + actual = data_array.isin(values) - assert_equal_with_units(result_without_units, result_with_units) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "variant", ( pytest.param( "masking", - marks=pytest.mark.xfail(reason="nan not compatible with quantity"), - ), - pytest.param( - "replacing_scalar", - marks=pytest.mark.xfail(reason="scalar not convertible using astype"), - ), - pytest.param( - "replacing_array", - marks=pytest.mark.xfail( - reason="replacing using an array drops the units" - ), + marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), ), + "replacing_scalar", + "replacing_array", pytest.param( "dropping", - marks=pytest.mark.xfail(reason="nan not compatible with quantity"), + marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), ), ), ) @@ -1741,13 +1796,10 @@ def test_isin(self, unit, dtype): ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="same_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), ), ) def test_where(self, variant, unit, error, dtype): - def _strip_units(mapping): - return {key: array_strip_units(value) for key, value in mapping.items()} - original_unit = unit_registry.m array = np.linspace(0, 1, 10).astype(dtype) * original_unit @@ -1762,19 +1814,28 @@ def _strip_units(mapping): "dropping": {"cond": condition, "drop": True}, } kwargs = variant_kwargs.get(variant) - kwargs_without_units = _strip_units(kwargs) + kwargs_without_units = { + key: strip_units( + convert_units( + value, {None: original_unit if array.check(unit) else None} + ) + ) + for key, value in kwargs.items() + } if variant not in ("masking", "dropping") and error is not None: with pytest.raises(error): data_array.where(**kwargs) - else: - expected = attach_units( - strip_units(array).where(**kwargs_without_units), - {"data": original_unit}, - ) - result = data_array.where(**kwargs) - assert_equal_with_units(expected, result) + return + + expected = attach_units( + strip_units(data_array).where(**kwargs_without_units), + extract_units(data_array), + ) + actual = data_array.where(**kwargs) + + assert_equal_with_units(expected, actual) @pytest.mark.xfail(reason="interpolate strips units") def test_interpolate_na(self, dtype): @@ -1785,14 +1846,12 @@ def test_interpolate_na(self, dtype): x = np.arange(len(array)) data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype) - expected = attach_units( - strip_units(data_array).interpolate_na(dim="x"), {"data": unit_registry.m} - ) - result = data_array.interpolate_na(dim="x") + units = extract_units(data_array) + expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units) + actual = data_array.interpolate_na(dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="uses DataArray.where, which currently fails") @pytest.mark.parametrize( "unit,error", ( @@ -1801,8 +1860,18 @@ def test_interpolate_na(self, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + pytest.param( + unit_registry.cm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="depends on reindex"), + ), + pytest.param( + unit_registry.m, + None, + id="identical_unit", + marks=pytest.mark.xfail(reason="depends on reindex"), + ), ), ) def test_combine_first(self, unit, error, dtype): @@ -1819,14 +1888,19 @@ def test_combine_first(self, unit, error, dtype): if error is not None: with pytest.raises(error): data_array.combine_first(other) - else: - expected = attach_units( - strip_units(data_array).combine_first(strip_units(other)), - {"data": unit_registry.m}, - ) - result = data_array.combine_first(other) - assert_equal_with_units(expected, result) + return + + units = extract_units(data_array) + expected = attach_units( + strip_units(data_array).combine_first( + strip_units(convert_units(other, units)) + ), + units, + ) + actual = data_array.combine_first(other) + + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "unit", @@ -1834,11 +1908,7 @@ def test_combine_first(self, unit, error, dtype): pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - id="compatible_unit", - marks=pytest.mark.xfail(reason="identical does not check units yet"), - ), + pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) @@ -1854,53 +1924,51 @@ def test_combine_first(self, unit, error, dtype): ) @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) def test_comparisons(self, func, variation, unit, dtype): + def is_compatible(a, b): + a = a if a is not None else 1 + b = b if b is not None else 1 + quantity = np.arange(5) * a + + return a == b or quantity.check(b) + data = np.linspace(0, 5, 10).astype(dtype) coord = np.arange(len(data)).astype(dtype) base_unit = unit_registry.m - quantity = data * base_unit - x = coord * base_unit - y = coord * base_unit - - units = { - "data": (unit, base_unit, base_unit), - "dims": (base_unit, unit, base_unit), - "coords": (base_unit, base_unit, unit), + array = data * (base_unit if variation == "data" else 1) + x = coord * (base_unit if variation == "dims" else 1) + y = coord * (base_unit if variation == "coords" else 1) + + variations = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), } - data_unit, dim_unit, coord_unit = units.get(variation) + data_unit, dim_unit, coord_unit = variations.get(variation) - data_array = xr.DataArray( - data=quantity, coords={"x": x, "y": ("x", y)}, dims="x" - ) + data_array = xr.DataArray(data=array, coords={"x": x, "y": ("x", y)}, dims="x") other = attach_units( - strip_units(data_array), - { - None: (data_unit, base_unit if quantity.check(data_unit) else None), - "x": (dim_unit, base_unit if x.check(dim_unit) else None), - "y": (coord_unit, base_unit if y.check(coord_unit) else None), - }, + strip_units(data_array), {None: data_unit, "x": dim_unit, "y": coord_unit} ) - # TODO: test dim coord once indexes leave units intact - # also, express this in terms of calls on the raw data array - # and then check the units - equal_arrays = ( - np.all(quantity == other.data) - and (np.all(x == other.x.data) or True) # dims can't be checked yet - and np.all(y == other.y.data) - ) - equal_units = ( - data_unit == unit_registry.m - and coord_unit == unit_registry.m - and dim_unit == unit_registry.m + units = extract_units(data_array) + other_units = extract_units(other) + + equal_arrays = all( + is_compatible(units[name], other_units[name]) for name in units.keys() + ) and ( + strip_units(data_array).equals( + strip_units(convert_units(other, extract_units(data_array))) + ) ) + equal_units = units == other_units expected = equal_arrays and (func.name != "identical" or equal_units) - result = func(data_array, other) - assert expected == result + actual = func(data_array, other) + + assert expected == actual - @pytest.mark.xfail(reason="blocked by `where`") @pytest.mark.parametrize( "unit", ( @@ -1926,9 +1994,9 @@ def test_broadcast_like(self, unit, dtype): expected = attach_units( strip_units(arr1).broadcast_like(strip_units(arr2)), extract_units(arr1) ) - result = arr1.broadcast_like(arr2) + actual = arr1.broadcast_like(arr2) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "unit", @@ -1942,19 +2010,21 @@ def test_broadcast_like(self, unit, dtype): ) def test_broadcast_equals(self, unit, dtype): left_array = np.ones(shape=(2, 2), dtype=dtype) * unit_registry.m - right_array = array_attach_units( - np.ones(shape=(2,), dtype=dtype), - unit, - convert_from=unit_registry.m if left_array.check(unit) else None, - ) + right_array = np.ones(shape=(2,), dtype=dtype) * unit left = xr.DataArray(data=left_array, dims=("x", "y")) right = xr.DataArray(data=right_array, dims="x") - expected = np.all(left_array == right_array[:, None]) - result = left.broadcast_equals(right) + units = { + **extract_units(left), + **({} if left_array.check(unit) else {None: None}), + } + expected = strip_units(left).broadcast_equals( + strip_units(convert_units(right, units)) + ) & left_array.check(unit) + actual = left.broadcast_equals(right) - assert expected == result + assert expected == actual @pytest.mark.parametrize( "func", @@ -1969,16 +2039,11 @@ def test_broadcast_equals(self, unit, dtype): dim={"z": np.linspace(10, 20, 12) * unit_registry.s}, axis=1, ), - method("drop_sel", labels="x"), + method("drop_vars", "x"), method("reset_coords", names="x2"), method("copy"), - pytest.param( - method("astype", np.float32), - marks=pytest.mark.xfail(reason="units get stripped"), - ), - pytest.param( - method("item", 1), marks=pytest.mark.xfail(reason="units get stripped") - ), + method("astype", np.float32), + method("item", 1), ), ids=repr, ) @@ -2001,67 +2066,38 @@ def test_content_manipulation(self, func, dtype): stripped_kwargs = { key: array_strip_units(value) for key, value in func.kwargs.items() } - expected = attach_units( - func(strip_units(data_array), **stripped_kwargs), - { - "data": quantity.units, - "x": x.units, - "x_mm": x2.units, - "x2": x2.units, - "y": y.units, - }, - ) - result = func(data_array) + units = {**{"x_mm": x2.units, "x2": x2.units}, **extract_units(data_array)} - assert_equal_with_units(expected, result) + expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) + actual = func(data_array) + + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( - "func", - ( - pytest.param( - method("drop_sel", labels=dict(x=np.array([1, 5]))), - marks=pytest.mark.xfail( - reason="selecting using incompatible units does not raise" - ), - ), - pytest.param(method("copy", data=np.arange(20))), - ), - ids=repr, + "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr ) @pytest.mark.parametrize( - "unit,error", + "unit", ( - pytest.param(1, DimensionalityError, id="no_unit"), - pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" - ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, KeyError, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + pytest.param(1, id="no_unit"), + pytest.param(unit_registry.dimensionless, id="dimensionless"), + pytest.param(unit_registry.degK, id="with_unit"), ), ) - def test_content_manipulation_with_units(self, func, unit, error, dtype): + def test_content_manipulation_with_units(self, func, unit, dtype): quantity = np.linspace(0, 10, 20, dtype=dtype) * unit_registry.pascal x = np.arange(len(quantity)) * unit_registry.m - data_array = xr.DataArray(name="data", data=quantity, coords={"x": x}, dims="x") + data_array = xr.DataArray(data=quantity, coords={"x": x}, dims="x") - kwargs = { - key: (value * unit if isinstance(value, np.ndarray) else value) - for key, value in func.kwargs.items() - } - stripped_kwargs = func.kwargs + kwargs = {key: value * unit for key, value in func.kwargs.items()} expected = attach_units( - func(strip_units(data_array), **stripped_kwargs), - {"data": quantity.units if func.name == "drop_sel" else unit, "x": x.units}, + func(strip_units(data_array)), {None: unit, "x": x.units} ) - if error is not None and func.name == "drop_sel": - with pytest.raises(error): - func(data_array, **kwargs) - else: - result = func(data_array, **kwargs) - assert_equal_with_units(expected, result) + + actual = func(data_array, **kwargs) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "indices", @@ -2074,95 +2110,152 @@ def test_isel(self, indices, dtype): array = np.arange(10).astype(dtype) * unit_registry.s x = np.arange(len(array)) * unit_registry.m - data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"]) + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") expected = attach_units( - strip_units(data_array).isel(x=indices), - {"data": unit_registry.s, "x": unit_registry.m}, + strip_units(data_array).isel(x=indices), extract_units(data_array) ) - result = data_array.isel(x=indices) + actual = data_array.isel(x=indices) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail( - reason="xarray does not support duck arrays in dimension coordinates" - ) + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( - "values", + "raw_values", ( - pytest.param(12, id="single value"), - pytest.param([10, 5, 13], id="list of multiple values"), - pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"), + pytest.param(10, id="single_value"), + pytest.param([10, 5, 13], id="list_of_values"), + pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( - "units,error", + "unit,error", ( - pytest.param(1, KeyError, id="no units"), + pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), - pytest.param(unit_registry.degree, KeyError, id="incorrect unit"), - pytest.param(unit_registry.s, None, id="correct unit"), + pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), + pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_sel(self, values, units, error, dtype): + def test_sel(self, raw_values, unit, error, dtype): array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m - x = np.arange(len(array)) * unit_registry.s - data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"]) + x = np.arange(len(array)) * unit_registry.m + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") - values_with_units = values * units + values = raw_values * unit - if error is not None: + if error is not None and not ( + isinstance(raw_values, (int, float)) and x.check(unit) + ): with pytest.raises(error): - data_array.sel(x=values_with_units) - else: - result_array = array[values] - result_data_array = data_array.sel(x=values_with_units) - assert_equal_with_units(result_array, result_data_array) + data_array.sel(x=values) + + return - @pytest.mark.xfail( - reason="xarray does not support duck arrays in dimension coordinates" + expected = attach_units( + strip_units(data_array).sel( + x=strip_units(convert_units(values, {None: array.units})) + ), + extract_units(data_array), + ) + actual = data_array.sel(x=values) + assert_equal_with_units(expected, actual) + + @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize( + "raw_values", + ( + pytest.param(10, id="single_value"), + pytest.param([10, 5, 13], id="list_of_values"), + pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), + ), + ) + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, KeyError, id="no_units"), + pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), + pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), + pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), ) + def test_loc(self, raw_values, unit, error, dtype): + array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m + x = np.arange(len(array)) * unit_registry.m + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") + + values = raw_values * unit + + if error is not None and not ( + isinstance(raw_values, (int, float)) and x.check(unit) + ): + with pytest.raises(error): + data_array.loc[{"x": values}] + + return + + expected = attach_units( + strip_units(data_array).loc[ + {"x": strip_units(convert_units(values, {None: array.units}))} + ], + extract_units(data_array), + ) + actual = data_array.loc[{"x": values}] + assert_equal_with_units(expected, actual) + + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( - "values", + "raw_values", ( - pytest.param(12, id="single value"), - pytest.param([10, 5, 13], id="list of multiple values"), - pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"), + pytest.param(10, id="single_value"), + pytest.param([10, 5, 13], id="list_of_values"), + pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( - "units,error", + "unit,error", ( - pytest.param(1, KeyError, id="no units"), + pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), - pytest.param(unit_registry.degree, KeyError, id="incorrect unit"), - pytest.param(unit_registry.s, None, id="correct unit"), + pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), + pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_loc(self, values, units, error, dtype): + def test_drop_sel(self, raw_values, unit, error, dtype): array = np.linspace(5, 10, 20).astype(dtype) * unit_registry.m - x = np.arange(len(array)) * unit_registry.s - data_array = xr.DataArray(data=array, coords={"x": x}, dims=["x"]) + x = np.arange(len(array)) * unit_registry.m + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") - values_with_units = values * units + values = raw_values * unit - if error is not None: + if error is not None and not ( + isinstance(raw_values, (int, float)) and x.check(unit) + ): with pytest.raises(error): - data_array.loc[values_with_units] - else: - result_array = array[values] - result_data_array = data_array.loc[values_with_units] - assert_equal_with_units(result_array, result_data_array) + data_array.drop_sel(x=values) + + return + + expected = attach_units( + strip_units(data_array).drop_sel( + x=strip_units(convert_units(values, {None: x.units})) + ), + extract_units(data_array), + ) + actual = data_array.drop_sel(x=values) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="tries to coerce using asarray") @pytest.mark.parametrize( "shape", ( - pytest.param((10, 20), id="nothing squeezable"), - pytest.param((10, 20, 1), id="last dimension squeezable"), - pytest.param((10, 1, 20), id="middle dimension squeezable"), - pytest.param((1, 10, 20), id="first dimension squeezable"), - pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"), + pytest.param((10, 20), id="nothing_squeezable"), + pytest.param((10, 20, 1), id="last_dimension_squeezable"), + pytest.param((10, 1, 20), id="middle_dimension_squeezable"), + pytest.param((1, 10, 20), id="first_dimension_squeezable"), + pytest.param((1, 10, 1, 20), id="first_and_last_dimension_squeezable"), ), ) def test_squeeze(self, shape, dtype): @@ -2177,38 +2270,27 @@ def test_squeeze(self, shape, dtype): data=array, coords=coords, dims=tuple(names[: len(shape)]) ) - result_array = array.squeeze() - result_data_array = data_array.squeeze() - assert_equal_with_units(result_array, result_data_array) + expected = attach_units( + strip_units(data_array).squeeze(), extract_units(data_array) + ) + actual = data_array.squeeze() + assert_equal_with_units(expected, actual) # try squeezing the dimensions separately names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) for index, name in enumerate(names): - assert_equal_with_units( - np.squeeze(array, axis=index), data_array.squeeze(dim=name) + expected = attach_units( + strip_units(data_array).squeeze(dim=name), extract_units(data_array) ) + actual = data_array.squeeze(dim=name) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail( - reason="indexes strip units and head / tail / thin only support integers" - ) - @pytest.mark.parametrize( - "unit,error", - ( - pytest.param(1, DimensionalityError, id="no_unit"), - pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" - ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), - ), - ) @pytest.mark.parametrize( "func", (method("head", x=7, y=3), method("tail", x=7, y=3), method("thin", x=7, y=3)), ids=repr, ) - def test_head_tail_thin(self, func, unit, error, dtype): + def test_head_tail_thin(self, func, dtype): array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK coords = { @@ -2216,27 +2298,24 @@ def test_head_tail_thin(self, func, unit, error, dtype): "y": np.arange(5) * unit_registry.m, } - arr = xr.DataArray(data=array, coords=coords, dims=("x", "y")) + data_array = xr.DataArray(data=array, coords=coords, dims=("x", "y")) - kwargs = {name: value * unit for name, value in func.kwargs.items()} - - if error is not None: - with pytest.raises(error): - func(arr, **kwargs) - - return - - expected = attach_units(func(strip_units(arr)), extract_units(arr)) - result = func(arr, **kwargs) + expected = attach_units( + func(strip_units(data_array)), extract_units(data_array) + ) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( - pytest.param(1, None, id="no_unit"), - pytest.param(unit_registry.dimensionless, None, id="dimensionless"), - pytest.param(unit_registry.s, None, id="incompatible_unit"), + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), @@ -2254,24 +2333,29 @@ def test_interp(self, unit, error): if error is not None: with pytest.raises(error): data_array.interp(x=new_coords) - else: - new_coords_ = ( - new_coords.magnitude if hasattr(new_coords, "magnitude") else new_coords - ) - result_array = strip_units(data_array).interp( - x=new_coords_ * unit_registry.degK - ) - result_data_array = data_array.interp(x=new_coords) - assert_equal_with_units(result_array, result_data_array) + return + + units = extract_units(data_array) + expected = attach_units( + strip_units(data_array).interp( + x=strip_units(convert_units(new_coords, {None: unit_registry.m})) + ), + units, + ) + actual = data_array.interp(x=new_coords) + + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="tries to coerce using asarray") + @pytest.mark.xfail(reason="indexes strip units") @pytest.mark.parametrize( "unit,error", ( - pytest.param(1, None, id="no_unit"), - pytest.param(unit_registry.dimensionless, None, id="dimensionless"), - pytest.param(unit_registry.s, None, id="incompatible_unit"), + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), @@ -2284,43 +2368,46 @@ def test_interp_like(self, unit, error): } data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - new_data_array = xr.DataArray( - data=np.empty((20, 10)), + other = xr.DataArray( + data=np.empty((20, 10)) * unit_registry.degK, coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, dims=("x", "y"), ) if error is not None: with pytest.raises(error): - data_array.interp_like(new_data_array) - else: - result_array = ( - xr.DataArray( - data=array.magnitude, - coords={name: value.magnitude for name, value in coords.items()}, - dims=("x", "y"), - ).interp_like(strip_units(new_data_array)) - * unit_registry.degK - ) - result_data_array = data_array.interp_like(new_data_array) + data_array.interp_like(other) - assert_equal_with_units(result_array, result_data_array) + return - @pytest.mark.xfail( - reason="pint does not implement np.result_type in __array_function__ yet" - ) + units = extract_units(data_array) + expected = attach_units( + strip_units(data_array).interp_like( + strip_units(convert_units(other, units)) + ), + units, + ) + actual = data_array.interp_like(other) + + assert_equal_with_units(expected, actual) + + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( - pytest.param(1, None, id="no_unit"), - pytest.param(unit_registry.dimensionless, None, id="dimensionless"), - pytest.param(unit_registry.s, None, id="incompatible_unit"), + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK + def test_reindex(self, unit, error, dtype): + array = ( + np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) new_coords = (np.arange(10) + 0.5) * unit coords = { "x": np.arange(10) * unit_registry.m, @@ -2328,65 +2415,70 @@ def test_reindex(self, unit, error): } data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) + func = method("reindex") if error is not None: with pytest.raises(error): - data_array.interp(x=new_coords) - else: - result_array = strip_units(data_array).reindex( - x=( - new_coords.magnitude - if hasattr(new_coords, "magnitude") - else new_coords - ) - * unit_registry.degK - ) - result_data_array = data_array.reindex(x=new_coords) + func(data_array, x=new_coords) - assert_equal_with_units(result_array, result_data_array) + return - @pytest.mark.xfail( - reason="pint does not implement np.result_type in __array_function__ yet" - ) + expected = attach_units( + func( + strip_units(data_array), + x=strip_units(convert_units(new_coords, {None: unit_registry.m})), + ), + {None: unit_registry.degK}, + ) + actual = func(data_array, x=new_coords) + + assert_equal_with_units(expected, actual) + + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( - pytest.param(1, None, id="no_unit"), - pytest.param(unit_registry.dimensionless, None, id="dimensionless"), - pytest.param(unit_registry.s, None, id="incompatible_unit"), + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK + def test_reindex_like(self, unit, error, dtype): + array = ( + np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) coords = { "x": (np.arange(10) + 0.3) * unit_registry.m, "y": (np.arange(5) + 0.3) * unit_registry.m, } data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - new_data_array = xr.DataArray( - data=np.empty((20, 10)), + other = xr.DataArray( + data=np.empty((20, 10)) * unit_registry.degK, coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, dims=("x", "y"), ) if error is not None: with pytest.raises(error): - data_array.reindex_like(new_data_array) - else: - expected = attach_units( - strip_units(data_array).reindex_like(strip_units(new_data_array)), - { - "data": unit_registry.degK, - "x": unit_registry.m, - "y": unit_registry.m, - }, - ) - result = data_array.reindex_like(new_data_array) + data_array.reindex_like(other) - assert_equal_with_units(expected, result) + return + + units = extract_units(data_array) + expected = attach_units( + strip_units(data_array).reindex_like( + strip_units(convert_units(other, units)) + ), + units, + ) + actual = data_array.reindex_like(other) + + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", @@ -2406,11 +2498,11 @@ def test_stacking_stacked(self, func, dtype): stacked = data_array.stack(z=("x", "y")) expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m}) - result = func(stacked) + actual = func(stacked) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="indexes strip the label units") + @pytest.mark.xfail(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): array = ( np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) @@ -2429,13 +2521,9 @@ def test_to_unstacked_dataset(self, dtype): func(strip_units(data_array)), {"y": y.units, **dict(zip(x.magnitude, [array.units] * len(y)))}, ).rename({elem.magnitude: elem for elem in x}) - result = func(data_array) - - print(data_array, expected, result, sep="\n") + actual = func(data_array) - assert_equal_with_units(expected, result) - - assert False + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", @@ -2446,10 +2534,7 @@ def test_to_unstacked_dataset(self, dtype): pytest.param( method("shift", x=2), marks=pytest.mark.xfail(reason="strips units") ), - pytest.param( - method("roll", x=2, roll_coords=False), - marks=pytest.mark.xfail(reason="strips units"), - ), + method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), ids=repr, @@ -2471,12 +2556,10 @@ def test_stacking_reordering(self, func, dtype): dims=("x", "y", "z"), ) - expected = attach_units( - func(strip_units(data_array)), {"data": unit_registry.m} - ) - result = func(data_array) + expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m}) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", @@ -2484,20 +2567,14 @@ def test_stacking_reordering(self, func, dtype): method("diff", dim="x"), method("differentiate", coord="x"), method("integrate", dim="x"), - pytest.param( - method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail( - reason="pint does not implement nanpercentile yet" - ), - ), - pytest.param( - method("reduce", func=np.sum, dim="x"), - marks=pytest.mark.xfail(reason="strips units"), - ), + method("quantile", q=[0.25, 0.75]), + method("reduce", func=np.sum, dim="x"), pytest.param( lambda x: x.dot(x), id="method_dot", - marks=pytest.mark.xfail(reason="pint does not implement einsum"), + marks=pytest.mark.xfail( + reason="pint does not implement the dot method" + ), ), ), ids=repr, @@ -2511,30 +2588,35 @@ def test_computation(self, func, dtype): y = np.arange(array.shape[1]) * unit_registry.s data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) - units = extract_units(data_array) + + # we want to make sure the output unit is correct + units = { + **extract_units(data_array), + **( + {} + if isinstance(func, (function, method)) + else extract_units(func(array.reshape(-1))) + ), + } expected = attach_units(func(strip_units(data_array)), units) - result = func(data_array) + actual = func(data_array) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", ( - pytest.param( - method("groupby", "y"), marks=pytest.mark.xfail(reason="strips units") - ), - pytest.param( - method("groupby_bins", "y", bins=4), - marks=pytest.mark.xfail(reason="strips units"), - ), + method("groupby", "x"), + method("groupby_bins", "y", bins=4), method("coarsen", y=2), pytest.param( - method("rolling", y=3), marks=pytest.mark.xfail(reason="strips units") + method("rolling", y=3), + marks=pytest.mark.xfail(reason="rolling strips units"), ), pytest.param( method("rolling_exp", y=3), - marks=pytest.mark.xfail(reason="strips units"), + marks=pytest.mark.xfail(reason="units not supported by numbagg"), ), ), ids=repr, @@ -2544,18 +2626,17 @@ def test_computation_objects(self, func, dtype): np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m ) - x = np.arange(array.shape[0]) * unit_registry.m + x = np.array([0, 0, 1, 2, 2]) * unit_registry.m y = np.arange(array.shape[1]) * 3 * unit_registry.s data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) units = extract_units(data_array) expected = attach_units(func(strip_units(data_array)).mean(), units) - result = func(data_array).mean() + actual = func(data_array).mean() - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="strips units") def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -2566,23 +2647,17 @@ def test_resample(self, dtype): func = method("resample", time="6m") expected = attach_units(func(strip_units(data_array)).mean(), units) - result = func(data_array).mean() + actual = func(data_array).mean() - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", ( - pytest.param( - method("assign_coords", {"z": (["x"], np.arange(5) * unit_registry.s)}), - marks=pytest.mark.xfail(reason="strips units"), - ), - pytest.param(method("first")), - pytest.param(method("last")), - pytest.param( - method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="strips units"), - ), + method("assign_coords", z=(["x"], np.arange(5) * unit_registry.s)), + method("first"), + method("last"), + method("quantile", q=np.array([0.25, 0.5, 0.75]), dim="x"), ), ids=repr, ) @@ -2595,12 +2670,22 @@ def test_grouped_operations(self, func, dtype): y = np.arange(array.shape[1]) * 3 * unit_registry.s data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) - units = extract_units(data_array) + units = {**extract_units(data_array), **{"z": unit_registry.s, "q": None}} - expected = attach_units(func(strip_units(data_array).groupby("y")), units) - result = func(data_array.groupby("y")) + stripped_kwargs = { + key: ( + strip_units(value) + if not isinstance(value, tuple) + else tuple(strip_units(elem) for elem in value) + ) + for key, value in func.kwargs.items() + } + expected = attach_units( + func(strip_units(data_array).groupby("y"), **stripped_kwargs), units + ) + actual = func(data_array.groupby("y")) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) class TestDataset: @@ -2620,10 +2705,7 @@ class TestDataset: "shared", ( "nothing", - pytest.param( - "dims", - marks=pytest.mark.xfail(reason="reindex does not work with pint yet"), - ), + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), pytest.param( "coords", marks=pytest.mark.xfail(reason="reindex does not work with pint yet"), @@ -2674,7 +2756,7 @@ def test_init(self, shared, unit, error, dtype): return - result = xr.Dataset(data_vars={"a": arr1, "b": arr2}) + actual = xr.Dataset(data_vars={"a": arr1, "b": arr2}) expected_units = { "a": a.units, @@ -2688,7 +2770,7 @@ def test_init(self, shared, unit, error, dtype): xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}), expected_units, ) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) @pytest.mark.parametrize( "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr")) @@ -2749,12 +2831,7 @@ def test_repr(self, func, variant, dtype): reason="np.median does not work with dataset yet" ), ), - pytest.param( - function("sum"), - marks=pytest.mark.xfail( - reason="np.result_type not implemented by pint" - ), - ), + function("sum"), pytest.param( function("prod"), marks=pytest.mark.xfail(reason="not implemented by pint"), @@ -2764,9 +2841,7 @@ def test_repr(self, func, variant, dtype): function("cumsum"), pytest.param( function("cumprod"), - marks=pytest.mark.xfail( - reason="pint does not support cumprod on non-dimensionless yet" - ), + marks=pytest.mark.xfail(reason="fails within xarray"), ), pytest.param( method("all"), marks=pytest.mark.xfail(reason="not implemented by pint") @@ -2780,12 +2855,7 @@ def test_repr(self, func, variant, dtype): method("min"), method("mean"), method("median"), - pytest.param( - method("sum"), - marks=pytest.mark.xfail( - reason="np.result_type not implemented by pint" - ), - ), + method("sum"), pytest.param( method("prod"), marks=pytest.mark.xfail(reason="not implemented by pint"), @@ -2794,17 +2864,20 @@ def test_repr(self, func, variant, dtype): method("var"), method("cumsum"), pytest.param( - method("cumprod"), - marks=pytest.mark.xfail( - reason="pint does not support cumprod on non-dimensionless yet" - ), + method("cumprod"), marks=pytest.mark.xfail(reason="fails within xarray") ), ), ids=repr, ) def test_aggregation(self, func, dtype): - unit_a = unit_registry.Pa - unit_b = unit_registry.kg / unit_registry.m ** 3 + unit_a = ( + unit_registry.Pa if func.name != "cumprod" else unit_registry.dimensionless + ) + unit_b = ( + unit_registry.kg / unit_registry.m ** 3 + if func.name != "cumprod" + else unit_registry.dimensionless + ) a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x") b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x") x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x") @@ -2814,13 +2887,16 @@ def test_aggregation(self, func, dtype): ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y}) - result = func(ds) + actual = func(ds) expected = attach_units( func(strip_units(ds)), - {"a": array_extract_units(func(a)), "b": array_extract_units(func(b))}, + { + "a": extract_units(func(a)).get(None), + "b": extract_units(func(b)).get(None), + }, ) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) @pytest.mark.parametrize("property", ("imag", "real")) def test_numpy_properties(self, property, dtype): @@ -2840,10 +2916,10 @@ def test_numpy_properties(self, property, dtype): ) units = extract_units(ds) - result = getattr(ds, property) + actual = getattr(ds, property) expected = attach_units(getattr(strip_units(ds), property), units) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) @pytest.mark.parametrize( "func", @@ -2853,10 +2929,6 @@ def test_numpy_properties(self, property, dtype): method("argsort"), method("conjugate"), method("round"), - pytest.param( - method("rank", dim="x"), - marks=pytest.mark.xfail(reason="pint does not implement rank yet"), - ), ), ids=repr, ) @@ -2882,10 +2954,10 @@ def test_numpy_methods(self, func, dtype): "y": unit_registry.s, } - result = func(ds) + actual = func(ds) expected = attach_units(func(strip_units(ds)), units) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr) @pytest.mark.parametrize( @@ -2914,37 +2986,26 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): ) units = extract_units(ds) - def strip(value): - return ( - value.magnitude if isinstance(value, unit_registry.Quantity) else value - ) - - def convert(value, to): - if isinstance(value, unit_registry.Quantity) and value.check(to): - return value.to(to) - - return value - - scalar_types = (int, float) kwargs = { - key: (value * unit if isinstance(value, scalar_types) else value) + key: (value * unit if isinstance(value, (int, float)) else value) for key, value in func.kwargs.items() } - stripped_kwargs = { - key: strip(convert(value, data_unit)) for key, value in kwargs.items() - } - if error is not None: with pytest.raises(error): func(ds, **kwargs) return - result = func(ds, **kwargs) + stripped_kwargs = { + key: strip_units(convert_units(value, {None: data_unit})) + for key, value in kwargs.items() + } + + actual = func(ds, **kwargs) expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -2987,9 +3048,9 @@ def test_missing_value_detection(self, func, dtype): ) expected = func(strip_units(ds)) - result = func(ds) + actual = func(ds) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose the unit") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -3017,40 +3078,35 @@ def test_missing_value_filling(self, func, dtype): func(strip_units(ds), dim="x"), {"a": unit_registry.degK, "b": unit_registry.Pa}, ) - result = func(ds, dim="x") + actual = func(ds, dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="fillna drops the unit") @pytest.mark.parametrize( "unit,error", ( - pytest.param( - 1, - DimensionalityError, - id="no_unit", - marks=pytest.mark.xfail(reason="blocked by the failing `where`"), - ), + pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param( + unit_registry.cm, + None, + id="compatible_unit", + marks=pytest.mark.xfail( + reason="where converts the array, not the fill value" + ), + ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @pytest.mark.parametrize( "fill_value", ( - pytest.param( - -1, - id="python scalar", - marks=pytest.mark.xfail( - reason="python scalar cannot be converted using astype()" - ), - ), - pytest.param(np.array(-1), id="numpy scalar"), - pytest.param(np.array([-1]), id="numpy array"), + pytest.param(-1, id="python_scalar"), + pytest.param(np.array(-1), id="numpy_scalar"), + pytest.param(np.array([-1]), id="numpy_array"), ), ) def test_fillna(self, fill_value, unit, error, dtype): @@ -3075,13 +3131,17 @@ def test_fillna(self, fill_value, unit, error, dtype): return - result = ds.fillna(value=fill_value * unit) + actual = ds.fillna(value=fill_value * unit) expected = attach_units( - strip_units(ds).fillna(value=fill_value), + strip_units(ds).fillna( + value=strip_units( + convert_units(fill_value * unit, {None: unit_registry.m}) + ) + ), {"a": unit_registry.m, "b": unit_registry.m}, ) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) def test_dropna(self, dtype): array1 = ( @@ -3105,11 +3165,10 @@ def test_dropna(self, dtype): strip_units(ds).dropna(dim="x"), {"a": unit_registry.degK, "b": unit_registry.Pa}, ) - result = ds.dropna(dim="x") + actual = ds.dropna(dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="pint does not implement `numpy.isin`") @pytest.mark.parametrize( "unit", ( @@ -3154,36 +3213,12 @@ def test_isin(self, unit, dtype): ): expected.a[:] = False expected.b[:] = False - result = ds.isin(values) + actual = ds.isin(values) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) @pytest.mark.parametrize( - "variant", - ( - pytest.param( - "masking", - marks=pytest.mark.xfail( - reason="np.result_type not implemented by quantity" - ), - ), - pytest.param( - "replacing_scalar", - marks=pytest.mark.xfail( - reason="python scalar not convertible using astype" - ), - ), - pytest.param( - "replacing_array", - marks=pytest.mark.xfail( - reason="replacing using an array drops the units" - ), - ), - pytest.param( - "dropping", - marks=pytest.mark.xfail(reason="nan not compatible with quantity"), - ), - ), + "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", @@ -3198,9 +3233,6 @@ def test_isin(self, unit, dtype): ), ) def test_where(self, variant, unit, error, dtype): - def _strip_units(mapping): - return {key: array_strip_units(value) for key, value in mapping.items()} - original_unit = unit_registry.m array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit @@ -3222,21 +3254,24 @@ def _strip_units(mapping): "dropping": {"cond": condition, "drop": True}, } kwargs = variant_kwargs.get(variant) - kwargs_without_units = _strip_units(kwargs) - if variant not in ("masking", "dropping") and error is not None: with pytest.raises(error): ds.where(**kwargs) return + kwargs_without_units = { + key: strip_units(convert_units(value, {None: original_unit})) + for key, value in kwargs.items() + } + expected = attach_units( strip_units(ds).where(**kwargs_without_units), {"a": original_unit, "b": original_unit}, ) - result = ds.where(**kwargs) + actual = ds.where(**kwargs) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.xfail(reason="interpolate strips units") def test_interpolate_na(self, dtype): @@ -3261,11 +3296,11 @@ def test_interpolate_na(self, dtype): strip_units(ds).interpolate_na(dim="x"), {"a": unit_registry.degK, "b": unit_registry.Pa}, ) - result = ds.interpolate_na(dim="x") + actual = ds.interpolate_na(dim="x") - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="uses Dataset.where, which currently fails") + @pytest.mark.xfail(reason="wrong argument order for `where`") @pytest.mark.parametrize( "unit,error", ( @@ -3281,11 +3316,11 @@ def test_interpolate_na(self, dtype): def test_combine_first(self, unit, error, dtype): array1 = ( np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) - * unit_registry.degK + * unit_registry.m ) array2 = ( np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) - * unit_registry.Pa + * unit_registry.m ) x = np.arange(len(array1)) ds = xr.Dataset( @@ -3312,12 +3347,16 @@ def test_combine_first(self, unit, error, dtype): return expected = attach_units( - strip_units(ds).combine_first(strip_units(other)), + strip_units(ds).combine_first( + strip_units( + convert_units(other, {"a": unit_registry.m, "b": unit_registry.m}) + ) + ), {"a": unit_registry.m, "b": unit_registry.m}, ) - result = ds.combine_first(other) + actual = ds.combine_first(other) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "unit", @@ -3325,11 +3364,7 @@ def test_combine_first(self, unit, error, dtype): pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - id="compatible_unit", - marks=pytest.mark.xfail(reason="identical does not check units yet"), - ), + pytest.param(unit_registry.cm, id="compatible_unit"), pytest.param(unit_registry.m, id="identical_unit"), ), ) @@ -3345,6 +3380,13 @@ def test_combine_first(self, unit, error, dtype): ) @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) def test_comparisons(self, func, variation, unit, dtype): + def is_compatible(a, b): + a = a if a is not None else 1 + b = b if b is not None else 1 + quantity = np.arange(5) * a + + return a == b or quantity.check(b) + array1 = np.linspace(0, 5, 10).astype(dtype) array2 = np.linspace(-5, 0, 10).astype(dtype) @@ -3356,11 +3398,7 @@ def test_comparisons(self, func, variation, unit, dtype): x = coord * original_unit y = coord * original_unit - units = { - "data": (unit, original_unit, original_unit), - "dims": (original_unit, unit, original_unit), - "coords": (original_unit, original_unit, unit), - } + units = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)} data_unit, dim_unit, coord_unit = units.get(variation) ds = xr.Dataset( @@ -3371,36 +3409,27 @@ def test_comparisons(self, func, variation, unit, dtype): coords={"x": x, "y": ("x", y)}, ) - other = attach_units( - strip_units(ds), - { - "a": (data_unit, original_unit if quantity1.check(data_unit) else None), - "b": (data_unit, original_unit if quantity2.check(data_unit) else None), - "x": (dim_unit, original_unit if x.check(dim_unit) else None), - "y": (coord_unit, original_unit if y.check(coord_unit) else None), - }, - ) + other_units = { + "a": data_unit if quantity1.check(data_unit) else None, + "b": data_unit if quantity2.check(data_unit) else None, + "x": dim_unit if x.check(dim_unit) else None, + "y": coord_unit if y.check(coord_unit) else None, + } + other = attach_units(strip_units(convert_units(ds, other_units)), other_units) - # TODO: test dim coord once indexes leave units intact - # also, express this in terms of calls on the raw data array - # and then check the units - equal_arrays = ( - np.all(ds.a.data == other.a.data) - and np.all(ds.b.data == other.b.data) - and (np.all(x == other.x.data) or True) # dims can't be checked yet - and np.all(y == other.y.data) - ) - equal_units = ( - data_unit == original_unit - and coord_unit == original_unit - and dim_unit == original_unit - ) - expected = equal_arrays and (func.name != "identical" or equal_units) - result = func(ds, other) + units = extract_units(ds) + other_units = extract_units(other) + + equal_ds = all( + is_compatible(units[name], other_units[name]) for name in units.keys() + ) and (strip_units(ds).equals(strip_units(convert_units(other, units)))) + equal_units = units == other_units + expected = equal_ds and (func.name != "identical" or equal_units) + + actual = func(ds, other) - assert expected == result + assert expected == actual - @pytest.mark.xfail(reason="blocked by `where`") @pytest.mark.parametrize( "unit", ( @@ -3430,9 +3459,9 @@ def test_broadcast_like(self, unit, dtype): expected = attach_units( strip_units(ds1).broadcast_like(strip_units(ds2)), extract_units(ds1) ) - result = ds1.broadcast_like(ds2) + actual = ds1.broadcast_like(ds2) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "unit", @@ -3446,38 +3475,34 @@ def test_broadcast_like(self, unit, dtype): ) def test_broadcast_equals(self, unit, dtype): left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m - left_array2 = np.zeros(shape=(2, 6), dtype=dtype) * unit_registry.m + left_array2 = np.zeros(shape=(3, 6), dtype=dtype) * unit_registry.m - right_array1 = array_attach_units( - np.ones(shape=(2,), dtype=dtype), - unit, - convert_from=unit_registry.m if left_array1.check(unit) else None, - ) - right_array2 = array_attach_units( - np.ones(shape=(2,), dtype=dtype), - unit, - convert_from=unit_registry.m if left_array2.check(unit) else None, - ) + right_array1 = np.ones(shape=(2,)) * unit + right_array2 = np.ones(shape=(3,)) * unit left = xr.Dataset( data_vars={ "a": xr.DataArray(data=left_array1, dims=("x", "y")), - "b": xr.DataArray(data=left_array2, dims=("x", "z")), + "b": xr.DataArray(data=left_array2, dims=("y", "z")), } ) right = xr.Dataset( data_vars={ "a": xr.DataArray(data=right_array1, dims="x"), - "b": xr.DataArray(data=right_array2, dims="x"), + "b": xr.DataArray(data=right_array2, dims="y"), } ) - expected = np.all(left_array1 == right_array1[:, None]) and np.all( - left_array2 == right_array2[:, None] - ) - result = left.broadcast_equals(right) + units = { + **extract_units(left), + **({} if left_array1.check(unit) else {"a": None, "b": None}), + } + expected = strip_units(left).broadcast_equals( + strip_units(convert_units(right, units)) + ) & left_array1.check(unit) + actual = left.broadcast_equals(right) - assert expected == result + assert expected == actual @pytest.mark.parametrize( "func", @@ -3510,11 +3535,10 @@ def test_stacking_stacked(self, func, dtype): expected = attach_units( func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m} ) - result = func(stacked) + actual = func(stacked) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="tries to subscript scalar quantities") def test_to_stacked_array(self, dtype): labels = np.arange(5).astype(dtype) * unit_registry.s arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels} @@ -3528,13 +3552,13 @@ def test_to_stacked_array(self, dtype): func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"]) - result = func(ds).rename(None) + actual = func(ds).rename(None) expected = attach_units( func(strip_units(ds)).rename(None), {None: unit_registry.m, "y": unit_registry.s}, ) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", @@ -3543,12 +3567,10 @@ def test_to_stacked_array(self, dtype): method("stack", a=("x", "y")), method("set_index", x="x2"), pytest.param( - method("shift", x=2), marks=pytest.mark.xfail(reason="sets all to nan") - ), - pytest.param( - method("roll", x=2, roll_coords=False), - marks=pytest.mark.xfail(reason="strips units"), + method("shift", x=2), + marks=pytest.mark.xfail(reason="tries to concatenate nan arrays"), ), + method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), ids=repr, @@ -3581,9 +3603,9 @@ def test_stacking_reordering(self, func, dtype): expected = attach_units( func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK} ) - result = func(ds) + actual = func(ds) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.xfail(reason="indexes strip units") @pytest.mark.parametrize( @@ -3610,35 +3632,33 @@ def test_isel(self, indices, dtype): strip_units(ds).isel(x=indices), {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m}, ) - result = ds.isel(x=indices) + actual = ds.isel(x=indices) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail( - reason="xarray does not support duck arrays in dimension coordinates" - ) + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( - "values", + "raw_values", ( - pytest.param(12, id="single_value"), + pytest.param(10, id="single_value"), pytest.param([10, 5, 13], id="list_of_values"), pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( - "units,error", + "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.ms, KeyError, id="compatible_unit"), - pytest.param(unit_registry.s, None, id="same_unit"), + pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_sel(self, values, units, error, dtype): + def test_sel(self, raw_values, unit, error, dtype): array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa - x = np.arange(len(array1)) * unit_registry.s + x = np.arange(len(array1)) * unit_registry.m ds = xr.Dataset( data_vars={ @@ -3648,46 +3668,46 @@ def test_sel(self, values, units, error, dtype): coords={"x": x}, ) - values_with_units = values * units + values = raw_values * unit - if error is not None: + if error is not None and not ( + isinstance(raw_values, (int, float)) and x.check(unit) + ): with pytest.raises(error): - ds.sel(x=values_with_units) + ds.sel(x=values) return expected = attach_units( - strip_units(ds).sel(x=values), - {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s}, + strip_units(ds).sel(x=strip_units(convert_units(values, {None: x.units}))), + {"a": array1.units, "b": array2.units, "x": x.units}, ) - result = ds.sel(x=values_with_units) - assert_equal_with_units(expected, result) + actual = ds.sel(x=values) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail( - reason="xarray does not support duck arrays in dimension coordinates" - ) + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( - "values", + "raw_values", ( - pytest.param(12, id="single value"), - pytest.param([10, 5, 13], id="list of multiple values"), - pytest.param(np.array([9, 3, 7, 12]), id="array of multiple values"), + pytest.param(10, id="single_value"), + pytest.param([10, 5, 13], id="list_of_values"), + pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), ), ) @pytest.mark.parametrize( - "units,error", + "unit,error", ( pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.ms, KeyError, id="compatible_unit"), - pytest.param(unit_registry.s, None, id="same_unit"), + pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_loc(self, values, units, error, dtype): + def test_drop_sel(self, raw_values, unit, error, dtype): array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa - x = np.arange(len(array1)) * unit_registry.s + x = np.arange(len(array1)) * unit_registry.m ds = xr.Dataset( data_vars={ @@ -3697,36 +3717,76 @@ def test_loc(self, values, units, error, dtype): coords={"x": x}, ) - values_with_units = values * units + values = raw_values * unit - if error is not None: + if error is not None and not ( + isinstance(raw_values, (int, float)) and x.check(unit) + ): with pytest.raises(error): - ds.loc[{"x": values_with_units}] + ds.drop_sel(x=values) return expected = attach_units( - strip_units(ds).loc[{"x": values}], - {"a": unit_registry.degK, "b": unit_registry.Pa, "x": unit_registry.s}, + strip_units(ds).drop_sel( + x=strip_units(convert_units(values, {None: x.units})) + ), + extract_units(ds), ) - result = ds.loc[{"x": values_with_units}] - assert_equal_with_units(expected, result) + actual = ds.drop_sel(x=values) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail( - reason="indexes strip units and head / tail / thin only support integers" + @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize( + "raw_values", + ( + pytest.param(10, id="single_value"), + pytest.param([10, 5, 13], id="list_of_values"), + pytest.param(np.array([9, 3, 7, 12]), id="array_of_values"), + ), ) @pytest.mark.parametrize( "unit,error", ( - pytest.param(1, DimensionalityError, id="no_unit"), - pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" - ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(1, KeyError, id="no_units"), + pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), + pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), + pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) + def test_loc(self, raw_values, unit, error, dtype): + array1 = np.linspace(5, 10, 20).astype(dtype) * unit_registry.degK + array2 = np.linspace(0, 5, 20).astype(dtype) * unit_registry.Pa + x = np.arange(len(array1)) * unit_registry.m + + ds = xr.Dataset( + data_vars={ + "a": xr.DataArray(data=array1, dims="x"), + "b": xr.DataArray(data=array2, dims="x"), + }, + coords={"x": x}, + ) + + values = raw_values * unit + + if error is not None and not ( + isinstance(raw_values, (int, float)) and x.check(unit) + ): + with pytest.raises(error): + ds.loc[{"x": values}] + + return + + expected = attach_units( + strip_units(ds).loc[ + {"x": strip_units(convert_units(values, {None: x.units}))} + ], + {"a": array1.units, "b": array2.units, "x": x.units}, + ) + actual = ds.loc[{"x": values}] + assert_equal_with_units(expected, actual) + @pytest.mark.parametrize( "func", ( @@ -3736,7 +3796,7 @@ def test_loc(self, values, units, error, dtype): ), ids=repr, ) - def test_head_tail_thin(self, func, unit, error, dtype): + def test_head_tail_thin(self, func, dtype): array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa @@ -3754,18 +3814,10 @@ def test_head_tail_thin(self, func, unit, error, dtype): coords=coords, ) - kwargs = {name: value * unit for name, value in func.kwargs.items()} - - if error is not None: - with pytest.raises(error): - func(ds, **kwargs) - - return - expected = attach_units(func(strip_units(ds)), extract_units(ds)) - result = func(ds, **kwargs) + actual = func(ds) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "shape", @@ -3802,15 +3854,15 @@ def test_squeeze(self, shape, dtype): expected = attach_units(strip_units(ds).squeeze(), units) - result = ds.squeeze() - assert_equal_with_units(result, expected) + actual = ds.squeeze() + assert_equal_with_units(actual, expected) # try squeezing the dimensions separately names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) for name in names: expected = attach_units(strip_units(ds).squeeze(dim=name), units) - result = ds.squeeze(dim=name) - assert_equal_with_units(result, expected) + actual = ds.squeeze(dim=name) + assert_equal_with_units(actual, expected) @pytest.mark.xfail(reason="ignores units") @pytest.mark.parametrize( @@ -3851,12 +3903,14 @@ def test_interp(self, unit, error): return + units = extract_units(ds) expected = attach_units( - strip_units(ds).interp(x=strip_units(new_coords)), extract_units(ds) + strip_units(ds).interp(x=strip_units(convert_units(new_coords, units))), + units, ) - result = ds.interp(x=new_coords) + actual = ds.interp(x=new_coords) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) @pytest.mark.xfail(reason="ignores units") @pytest.mark.parametrize( @@ -3911,16 +3965,15 @@ def test_interp_like(self, unit, error, dtype): return + units = extract_units(ds) expected = attach_units( - strip_units(ds).interp_like(strip_units(other)), extract_units(ds) + strip_units(ds).interp_like(strip_units(convert_units(other, units))), units ) - result = ds.interp_like(other) + actual = ds.interp_like(other) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) - @pytest.mark.xfail( - reason="pint does not implement np.result_type in __array_function__ yet" - ) + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -3933,9 +3986,13 @@ def test_interp_like(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex(self, unit, error): - array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa + def test_reindex(self, unit, error, dtype): + array1 = ( + np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK + ) + array2 = ( + np.linspace(1, 2, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa + ) coords = { "x": np.arange(10) * unit_registry.m, @@ -3955,20 +4012,21 @@ def test_reindex(self, unit, error): if error is not None: with pytest.raises(error): - ds.interp(x=new_coords) + ds.reindex(x=new_coords) return expected = attach_units( - strip_units(ds).reindex(x=strip_units(new_coords)), extract_units(ds) + strip_units(ds).reindex( + x=strip_units(convert_units(new_coords, {None: coords["x"].units})) + ), + extract_units(ds), ) - result = ds.reindex(x=new_coords) + actual = ds.reindex(x=new_coords) - assert_equal_with_units(result, expected) + assert_equal_with_units(actual, expected) - @pytest.mark.xfail( - reason="pint does not implement np.result_type in __array_function__ yet" - ) + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -4021,12 +4079,14 @@ def test_reindex_like(self, unit, error, dtype): return + units = extract_units(ds) expected = attach_units( - strip_units(ds).reindex_like(strip_units(other)), extract_units(ds) + strip_units(ds).reindex_like(strip_units(convert_units(other, units))), + units, ) - result = ds.reindex_like(other) + actual = ds.reindex_like(other) - assert_equal_with_units(result, expected) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", @@ -4034,20 +4094,9 @@ def test_reindex_like(self, unit, error, dtype): method("diff", dim="x"), method("differentiate", coord="x"), method("integrate", coord="x"), - pytest.param( - method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail( - reason="pint does not implement nanpercentile yet" - ), - ), - pytest.param( - method("reduce", func=np.sum, dim="x"), - marks=pytest.mark.xfail(reason="strips units"), - ), - pytest.param( - method("map", np.fabs), - marks=pytest.mark.xfail(reason="fabs strips units"), - ), + method("quantile", q=[0.25, 0.75]), + method("reduce", func=np.sum, dim="x"), + method("map", np.fabs), ), ids=repr, ) @@ -4073,27 +4122,22 @@ def test_computation(self, func, dtype): units = extract_units(ds) expected = attach_units(func(strip_units(ds)), units) - result = func(ds) + actual = func(ds) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", ( - pytest.param( - method("groupby", "x"), marks=pytest.mark.xfail(reason="strips units") - ), - pytest.param( - method("groupby_bins", "x", bins=4), - marks=pytest.mark.xfail(reason="strips units"), - ), + method("groupby", "x"), + method("groupby_bins", "x", bins=4), method("coarsen", x=2), pytest.param( method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units") ), pytest.param( method("rolling_exp", x=3), - marks=pytest.mark.xfail(reason="strips units"), + marks=pytest.mark.xfail(reason="uses numbagg which strips units"), ), ), ids=repr, @@ -4122,11 +4166,10 @@ def test_computation_objects(self, func, dtype): args = [] if func.name != "groupby" else ["y"] reduce_func = method("mean", *args) expected = attach_units(reduce_func(func(strip_units(ds))), units) - result = reduce_func(func(ds)) + actual = reduce_func(func(ds)) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="strips units") def test_resample(self, dtype): array1 = ( np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK @@ -4150,29 +4193,18 @@ def test_resample(self, dtype): func = method("resample", time="6m") expected = attach_units(func(strip_units(ds)).mean(), units) - result = func(ds).mean() + actual = func(ds).mean() - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", ( - pytest.param( - method("assign", c=lambda ds: 10 * ds.b), - marks=pytest.mark.xfail(reason="strips units"), - ), - pytest.param( - method("assign_coords", v=("x", np.arange(10) * unit_registry.s)), - marks=pytest.mark.xfail(reason="strips units"), - ), - pytest.param(method("first")), - pytest.param(method("last")), - pytest.param( - method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail( - reason="dataset groupby does not implement quantile" - ), - ), + method("assign", c=lambda ds: 10 * ds.b), + method("assign_coords", v=("x", np.arange(10) * unit_registry.s)), + method("first"), + method("last"), + method("quantile", q=[0.25, 0.5, 0.75], dim="x"), ), ids=repr, ) @@ -4204,9 +4236,9 @@ def test_grouped_operations(self, func, dtype): expected = attach_units( func(strip_units(ds).groupby("y"), **stripped_kwargs), units ) - result = func(ds.groupby("y")) + actual = func(ds.groupby("y")) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) @pytest.mark.parametrize( "func", @@ -4220,7 +4252,7 @@ def test_grouped_operations(self, func, dtype): method("rename_dims", x="offset_x"), method("swap_dims", {"x": "x2"}), method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1), - method("drop_sel", labels="x"), + method("drop_vars", "x"), method("drop_dims", "z"), method("set_coords", names="c"), method("reset_coords", names="x2"), @@ -4252,26 +4284,25 @@ def test_content_manipulation(self, func, dtype): }, coords={"x": x, "y": y, "z": z, "x2": ("x", x2)}, ) - units = extract_units(ds) - units.update( - { + units = { + **extract_units(ds), + **{ "y2": unit_registry.mm, "x_mm": unit_registry.mm, "offset_x": unit_registry.m, "d": unit_registry.Pa, "temperature": unit_registry.degK, - } - ) + }, + } stripped_kwargs = { key: strip_units(value) for key, value in func.kwargs.items() } expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) - result = func(ds) + actual = func(ds) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) - @pytest.mark.xfail(reason="blocked by reindex") @pytest.mark.parametrize( "unit,error", ( @@ -4284,7 +4315,16 @@ def test_content_manipulation(self, func, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - @pytest.mark.parametrize("variant", ("data", "dims", "coords")) + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) def test_merge(self, variant, unit, error, dtype): original_data_unit = unit_registry.m original_dim_unit = unit_registry.m @@ -4325,6 +4365,6 @@ def test_merge(self, variant, unit, error, dtype): converted = convert_units(right, units) expected = attach_units(strip_units(left).merge(strip_units(converted)), units) - result = left.merge(right) + actual = left.merge(right) - assert_equal_with_units(expected, result) + assert_equal_with_units(expected, actual) From bcf0d61442a5ea7a2bd990b9e7f18f8932376a25 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 10 Dec 2019 15:41:20 +0000 Subject: [PATCH 17/28] Add Facetgrid.row_labels & Facetgrid.col_labels (#3597) * Add Facetgrid.row_labels & Facetgrid.col_labels This allows labels to be changed later. * Update docs. --- doc/api.rst | 18 ++++++++++++++++++ doc/plotting.rst | 13 +++++++++++-- doc/whats-new.rst | 6 ++++++ xarray/plot/facetgrid.py | 40 +++++++++++++++++++++++++++------------ xarray/tests/test_plot.py | 21 ++++++++++++++++++++ 5 files changed, 84 insertions(+), 14 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index a1fae3deb03..8b523b7837c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -625,7 +625,25 @@ Plotting plot.imshow plot.line plot.pcolormesh + +Faceting +-------- +.. autosummary:: + :toctree: generated/ + plot.FacetGrid + plot.FacetGrid.add_colorbar + plot.FacetGrid.add_legend + plot.FacetGrid.map + plot.FacetGrid.map_dataarray + plot.FacetGrid.map_dataarray_line + plot.FacetGrid.map_dataset + plot.FacetGrid.set_axis_labels + plot.FacetGrid.set_ticks + plot.FacetGrid.set_titles + plot.FacetGrid.set_xlabels + plot.FacetGrid.set_ylabels + Testing ======= diff --git a/doc/plotting.rst b/doc/plotting.rst index 270988b99de..d77a170ce85 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -487,6 +487,7 @@ Faceting here refers to splitting an array along one or two dimensions and plotting each group. xarray's basic plotting is useful for plotting two dimensional arrays. What about three or four dimensional arrays? That's where facets become helpful. +The general approach to plotting here is called “small multiples”, where the same kind of plot is repeated multiple times, and the specific use of small multiples to display the same relationship conditioned on one ore more other variables is often called a “trellis plot”. Consider the temperature data set. There are 4 observations per day for two years which makes for 2920 values along the time dimension. @@ -572,8 +573,9 @@ Faceted plotting supports other arguments common to xarray 2d plots. FacetGrid Objects =================== -:py:class:`xarray.plot.FacetGrid` is used to control the behavior of the -multiple plots. +The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.FacetGrid`` object +that links a :py:class:`DataArray` to a matplotlib figure with a particular structure. +This object can be used to control the behavior of the multiple plots. It borrows an API and code from `Seaborn's FacetGrid `_. The structure is contained within the ``axes`` and ``name_dicts`` @@ -609,6 +611,13 @@ they have been plotted. @savefig plot_facet_iterator.png plt.draw() + +:py:class:`~xarray.FacetGrid` objects have methods that let you customize the automatically generated +axis labels, axis ticks and plot titles. See :py:meth:`~xarray.plot.FacetGrid.set_titles`, +:py:meth:`~xarray.plot.FacetGrid.set_xlabels`, :py:meth:`~xarray.plot.FacetGrid.set_ylabels` and +:py:meth:`~xarray.plot.FacetGrid.set_ticks` for more information. +Plotting functions can be applied to each subset of the data by calling :py:meth:`~xarray.plot.FacetGrid.map_dataarray` or to each subplot by calling :py:meth:`FacetGrid.map`. + TODO: add an example of using the ``map`` method to plot dataset variables (e.g., with ``plt.quiver``). diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0a3406c3ebe..aa67a46c38e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,12 @@ Bug fixes `_. - Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) By `Deepak Cherian `_. +- :py:meth:`~xarray.plot.FacetGrid.set_titles` can now replace existing row titles of a + :py:class:`~xarray.plot.FacetGrid` plot. In addition :py:class:`~xarray.plot.FacetGrid` gained + two new attributes: :py:attr:`~xarray.plot.FacetGrid.col_labels` and + :py:attr:`~xarray.plot.FacetGrid.row_labels` contain matplotlib Text handles for both column and + row labels. These can be used to manually change the labels. + By `Deepak Cherian `_. - Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`) By `Tom Augspurger `_. diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 7f13ba601fe..4f3268c1203 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -61,6 +61,10 @@ class FacetGrid: axes : numpy object array Contains axes in corresponding position, as returned from plt.subplots + col_labels : list + list of :class:`matplotlib.text.Text` instances corresponding to column titles. + row_labels : list + list of :class:`matplotlib.text.Text` instances corresponding to row titles. fig : matplotlib.Figure The figure containing all the axes name_dicts : numpy object array @@ -200,6 +204,8 @@ def __init__( self._ncol = ncol self._col_var = col self._col_wrap = col_wrap + self.row_labels = [None] * nrow + self.col_labels = [None] * ncol self._x_var = None self._y_var = None self._cmap_extend = None @@ -482,22 +488,32 @@ def set_titles(self, template="{coord} = {value}", maxchar=30, size=None, **kwar ax.set_title(title, size=size, **kwargs) else: # The row titles on the right edge of the grid - for ax, row_name in zip(self.axes[:, -1], self.row_names): + for index, (ax, row_name, handle) in enumerate( + zip(self.axes[:, -1], self.row_names, self.row_labels) + ): title = nicetitle(coord=self._row_var, value=row_name, maxchar=maxchar) - ax.annotate( - title, - xy=(1.02, 0.5), - xycoords="axes fraction", - rotation=270, - ha="left", - va="center", - **kwargs, - ) + if not handle: + self.row_labels[index] = ax.annotate( + title, + xy=(1.02, 0.5), + xycoords="axes fraction", + rotation=270, + ha="left", + va="center", + **kwargs, + ) + else: + handle.set_text(title) # The column titles on the top row - for ax, col_name in zip(self.axes[0, :], self.col_names): + for index, (ax, col_name, handle) in enumerate( + zip(self.axes[0, :], self.col_names, self.col_labels) + ): title = nicetitle(coord=self._col_var, value=col_name, maxchar=maxchar) - ax.set_title(title, size=size, **kwargs) + if not handle: + self.col_labels[index] = ax.set_title(title, size=size, **kwargs) + else: + handle.set_text(title) return self diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index a10f0d9a67e..a5402d88f3e 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -62,6 +62,15 @@ def substring_in_axes(substring, ax): return False +def substring_not_in_axes(substring, ax): + """ + Return True if a substring is not found anywhere in an axes + """ + alltxt = {t.get_text() for t in ax.findobj(mpl.text.Text)} + check = [(substring not in txt) for txt in alltxt] + return all(check) + + def easy_array(shape, start=0, stop=1): """ Make an array with desired shape using np.linspace @@ -1776,6 +1785,18 @@ def test_default_labels(self): for label, ax in zip(self.darray.coords["col"].values, g.axes[0, :]): assert substring_in_axes(label, ax) + # ensure that row & col labels can be changed + g.set_titles("abc={value}") + for label, ax in zip(self.darray.coords["row"].values, g.axes[:, -1]): + assert substring_in_axes(f"abc={label}", ax) + # previous labels were "row=row0" etc. + assert substring_not_in_axes("row=", ax) + + for label, ax in zip(self.darray.coords["col"].values, g.axes[0, :]): + assert substring_in_axes(f"abc={label}", ax) + # previous labels were "col=row0" etc. + assert substring_not_in_axes("col=", ax) + @pytest.mark.filterwarnings("ignore:tight_layout cannot") class TestFacetedLinePlotsLegend(PlotTestCase): From 2ee89c36a7ed512a221d38e09e7637429ced22d3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 10 Dec 2019 09:02:00 -0700 Subject: [PATCH 18/28] Respect user-specified coordinates attribute. (#3487) * Respect user-specified coordinates attribute. * Add whats-new * Better if statement. * maybe it's better to not raise an error if "coordinates" in attrs. * tweak whats-new. * more thorough test. * Emit one "coordinates" warning per dataset, instead of one per variable. Also add stacklevel * Preserve attrs["coordinates"] when roundtripping with decode_coords=False * Avoid raising warnings. * fix whats-new * [minor] add comments * fix whats-new * Actually test global "coordinates" handling. * filerwarning not necessary. * Add comment * fix whats-new --- doc/io.rst | 16 ++++++++++++- doc/whats-new.rst | 3 +++ xarray/conventions.py | 40 +++++++++++++++++++++----------- xarray/tests/test_backends.py | 37 +++++++++++++++++++++++++++-- xarray/tests/test_conventions.py | 14 +++++++++++ 5 files changed, 93 insertions(+), 17 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 8f8a776f73a..2e50e5639da 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -437,9 +437,23 @@ like ``'days'`` for ``timedelta64`` data. ``calendar`` should be one of the cale supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. -By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time +By default, xarray uses the ``'proleptic_gregorian'`` calendar and units of the smallest time difference between values, with a reference time of the first time value. + +.. _io.coordinates: + +Coordinates +........... + +You can control the ``coordinates`` attribute written to disk by specifying ``DataArray.encoding["coordinates"]``. +If not specified, xarray automatically sets ``DataArray.encoding["coordinates"]`` to a space-delimited list +of names of coordinate variables that share dimensions with the ``DataArray`` being written. +This allows perfect roundtripping of xarray datasets but may not be desirable. +When an xarray ``Dataset`` contains non-dimensional coordinates that do not share dimensions with any of +the variables, these coordinate variable names are saved under a "global" ``"coordinates"`` attribute. +This is not CF-compliant but again facilitates roundtripping of xarray datasets. + Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aa67a46c38e..1f60d457432 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -148,6 +148,9 @@ New Features invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. +- xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk. + See :ref:`io.coordinates` for more. (:issue:`3351`, :pull:`3487`) + By `Deepak Cherian `_. - Add the documented-but-missing :py:meth:`DatasetGroupBy.quantile`. (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. diff --git a/xarray/conventions.py b/xarray/conventions.py index a83b4b31c17..a8b9906c153 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -5,7 +5,7 @@ import pandas as pd from .coding import strings, times, variables -from .coding.variables import SerializationWarning +from .coding.variables import SerializationWarning, pop_to from .core import duck_array_ops, indexing from .core.common import contains_cftime_datetimes from .core.pycompat import dask_array_type @@ -660,34 +660,46 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): and set(target_dims) <= set(v.dims) ): variable_coordinates[k].add(coord_name) - global_coordinates.discard(coord_name) variables = {k: v.copy(deep=False) for k, v in variables.items()} - # These coordinates are saved according to CF conventions - for var_name, coord_names in variable_coordinates.items(): - attrs = variables[var_name].attrs - if "coordinates" in attrs: + # keep track of variable names written to file under the "coordinates" attributes + written_coords = set() + for name, var in variables.items(): + encoding = var.encoding + attrs = var.attrs + if "coordinates" in attrs and "coordinates" in encoding: raise ValueError( - "cannot serialize coordinates because variable " - "%s already has an attribute 'coordinates'" % var_name + f"'coordinates' found in both attrs and encoding for variable {name!r}." ) - attrs["coordinates"] = " ".join(map(str, coord_names)) + + # this will copy coordinates from encoding to attrs if "coordinates" in attrs + # after the next line, "coordinates" is never in encoding + # we get support for attrs["coordinates"] for free. + coords_str = pop_to(encoding, attrs, "coordinates") + if not coords_str and variable_coordinates[name]: + attrs["coordinates"] = " ".join(map(str, variable_coordinates[name])) + if "coordinates" in attrs: + written_coords.update(attrs["coordinates"].split()) # These coordinates are not associated with any particular variables, so we # save them under a global 'coordinates' attribute so xarray can roundtrip # the dataset faithfully. Because this serialization goes beyond CF # conventions, only do it if necessary. # Reference discussion: - # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/057771.html + # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/007571.html + global_coordinates.difference_update(written_coords) if global_coordinates: attributes = dict(attributes) if "coordinates" in attributes: - raise ValueError( - "cannot serialize coordinates because the global " - "attribute 'coordinates' already exists" + warnings.warn( + f"cannot serialize global coordinates {global_coordinates!r} because the global " + f"attribute 'coordinates' already exists. This may prevent faithful roundtripping" + f"of xarray datasets", + SerializationWarning, ) - attributes["coordinates"] = " ".join(map(str, global_coordinates)) + else: + attributes["coordinates"] = " ".join(map(str, global_coordinates)) return variables, attributes diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1e135ebd3e1..a23527bd49a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -33,6 +33,7 @@ from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding from xarray.backends.pydap_ import PydapDataStore from xarray.coding.variables import SerializationWarning +from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing from xarray.core.options import set_options from xarray.core.pycompat import dask_array_type @@ -522,15 +523,35 @@ def test_roundtrip_coordinates(self): with self.roundtrip(original) as actual: assert_identical(original, actual) + original["foo"].encoding["coordinates"] = "y" + with self.roundtrip(original, open_kwargs={"decode_coords": False}) as expected: + # check roundtripping when decode_coords=False + with self.roundtrip( + expected, open_kwargs={"decode_coords": False} + ) as actual: + assert_identical(expected, actual) + def test_roundtrip_global_coordinates(self): - original = Dataset({"x": [2, 3], "y": ("a", [42]), "z": ("x", [4, 5])}) + original = Dataset( + {"foo": ("x", [0, 1])}, {"x": [2, 3], "y": ("a", [42]), "z": ("x", [4, 5])} + ) with self.roundtrip(original) as actual: assert_identical(original, actual) + # test that global "coordinates" is as expected + _, attrs = encode_dataset_coordinates(original) + assert attrs["coordinates"] == "y" + + # test warning when global "coordinates" is already set + original.attrs["coordinates"] = "foo" + with pytest.warns(SerializationWarning): + _, attrs = encode_dataset_coordinates(original) + assert attrs["coordinates"] == "foo" + def test_roundtrip_coordinates_with_space(self): original = Dataset(coords={"x": 0, "y z": 1}) expected = Dataset({"y z": 1}, {"x": 0}) - with pytest.warns(xr.SerializationWarning): + with pytest.warns(SerializationWarning): with self.roundtrip(original) as actual: assert_identical(expected, actual) @@ -810,6 +831,18 @@ def equals_latlon(obj): assert "coordinates" not in ds["lat"].attrs assert "coordinates" not in ds["lon"].attrs + original["temp"].encoding["coordinates"] = "lat" + with self.roundtrip(original) as actual: + assert_identical(actual, original) + original["precip"].encoding["coordinates"] = "lat" + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with open_dataset(tmp_file, decode_coords=True) as ds: + assert "lon" not in ds["temp"].encoding["coordinates"] + assert "lon" not in ds["precip"].encoding["coordinates"] + assert "coordinates" not in ds["lat"].encoding + assert "coordinates" not in ds["lon"].encoding + def test_roundtrip_endian(self): ds = Dataset( { diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 09002e252b4..acb2400ea04 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -136,6 +136,20 @@ def test_multidimensional_coordinates(self): # Should not have any global coordinates. assert "coordinates" not in attrs + def test_do_not_overwrite_user_coordinates(self): + orig = Dataset( + coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])}, + data_vars={"a": ("x", [1, 2, 3]), "b": ("x", [3, 5, 6])}, + ) + orig["a"].encoding["coordinates"] = "y" + orig["b"].encoding["coordinates"] = "z" + enc, _ = conventions.encode_dataset_coordinates(orig) + assert enc["a"].attrs["coordinates"] == "y" + assert enc["b"].attrs["coordinates"] == "z" + orig["a"].attrs["coordinates"] = "foo" + with raises_regex(ValueError, "'coordinates' found in both attrs"): + conventions.encode_dataset_coordinates(orig) + @requires_dask def test_string_object_warning(self): original = Variable(("x",), np.array(["foo", "bar"], dtype=object)).chunk() From 23d76b44cb879337f2e6c33c3f32bdd12f0efbe9 Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Date: Thu, 12 Dec 2019 13:48:50 -0500 Subject: [PATCH 19/28] Minor doc fixes (#3615) --- xarray/core/dataarray.py | 10 +++++----- xarray/core/dataset.py | 12 ++++++------ xarray/core/variable.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b649df6dd56..31cd3c713f6 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1115,7 +1115,7 @@ def thin( **indexers_kwargs: Any, ) -> "DataArray": """Return a new DataArray whose data is given by each `n` value - along the specified dimension(s). Default `n` = 5 + along the specified dimension(s). See Also -------- @@ -1289,7 +1289,7 @@ def reindex( satisfy the equation ``abs(index[indexer] - target) <= tolerance``. fill_value : scalar, optional Value to use for newly missing values - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -1338,7 +1338,7 @@ def interp( values. kwargs: dictionary Additional keyword passed to scipy's interpolator. - ``**coords_kwarg`` : {dim: coordinate, ...}, optional + ``**coords_kwargs`` : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -2740,7 +2740,7 @@ def shift( Value to use for newly missing values **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- @@ -2791,7 +2791,7 @@ def roll( deprecated and will change to False in a future version. Explicitly pass roll_coords to silence the warning. **shifts_kwargs : The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index cc821becd6f..6be06fed117 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1887,7 +1887,7 @@ def isel( drop : bool, optional If ``drop=True``, drop coordinates variables indexed by integers instead of making them scalar. - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2033,7 +2033,7 @@ def sel( drop : bool, optional If ``drop=True``, drop coordinates variables in `indexers` instead of making them scalar. - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2168,7 +2168,7 @@ def thin( Parameters ---------- - indexers : dict or int, default: 5 + indexers : dict or int A dict with keys matching dimensions and integer values `n` or a single integer `n` applied over all dimensions. One of indexers or indexers_kwargs must be provided. @@ -2332,7 +2332,7 @@ def reindex( fill_value : scalar, optional Value to use for newly missing values sparse: use sparse-array. By default, False - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional Keyword arguments in the same form as ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2547,7 +2547,7 @@ def interp( values. kwargs: dictionary, optional Additional keyword passed to scipy's interpolator. - **coords_kwarg : {dim: coordinate, ...}, optional + **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -4938,7 +4938,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Value to use for newly missing values **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- diff --git a/xarray/core/variable.py b/xarray/core/variable.py index aa04cffb5ea..ac4b367f66d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1137,7 +1137,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Value to use for newly missing values **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- @@ -1245,7 +1245,7 @@ def roll(self, shifts=None, **shifts_kwargs): left. **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- From f2b2f9f62ea0f1020262a7ff563bfe74258ffaa1 Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Fri, 13 Dec 2019 11:05:23 -0500 Subject: [PATCH 20/28] Provide shape info in shape mismatch error. (#3619) * Provide shape info in shape mismatch error. * Reword error message. Co-Authored-By: Deepak Cherian --- xarray/core/variable.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ac4b367f66d..17ecdf62730 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -346,7 +346,10 @@ def data(self): def data(self, data): data = as_compatible_data(data) if data.shape != self.shape: - raise ValueError("replacement data must match the Variable's shape") + raise ValueError( + f"replacement data must match the Variable's shape. " + f"replacement data has shape {data.shape}; Variable has shape {self.shape}" + ) self._data = data def load(self, **kwargs): From 6295bc6bca1559680544ea86051f35fa2d367fe1 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 17 Dec 2019 14:50:05 +0100 Subject: [PATCH 21/28] Fix/quantile wrong errmsg (#3635) * test correct error is thrown * quantile: throw out of bounds error * whats-new --- doc/whats-new.rst | 2 ++ xarray/core/variable.py | 6 ++++++ xarray/tests/test_variable.py | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1f60d457432..44bff9e7202 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,8 @@ Bug fixes By `Deepak Cherian `_. - Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`) By `Tom Augspurger `_. +- Ensure :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` issue the correct error + when ``q`` is out of bounds (:issue:`3634`) by `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 17ecdf62730..4474d973f59 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1731,6 +1731,10 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): scalar = utils.is_scalar(q) q = np.atleast_1d(np.asarray(q, dtype=np.float64)) + # TODO: remove once numpy >= 1.15.0 is the minimum requirement + if np.count_nonzero(q < 0.0) or np.count_nonzero(q > 1.0): + raise ValueError("Quantiles must be in the range [0, 1]") + if dim is None: dim = self.dims @@ -1739,6 +1743,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc + + # TODO: use np.nanquantile once numpy >= 1.15.0 is the minimum requirement return np.moveaxis(np.nanpercentile(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 1d83e16a5bd..49a6906d5be 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1542,6 +1542,14 @@ def test_quantile_chunked_dim_error(self): with raises_regex(ValueError, "dimension 'x'"): v.quantile(0.5, dim="x") + @pytest.mark.parametrize("q", [-0.1, 1.1, [2], [0.25, 2]]) + def test_quantile_out_of_bounds(self, q): + v = Variable(["x", "y"], self.d) + + # escape special characters + with raises_regex(ValueError, r"Quantiles must be in the range \[0, 1\]"): + v.quantile(q, dim="x") + @requires_dask @requires_bottleneck def test_rank_dask_raises(self): From 6ad59b93f814b48053b1a9eea61d7c43517105cb Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 17 Dec 2019 17:25:26 +0100 Subject: [PATCH 22/28] silence sphinx warnings round 3 (#3602) * fix the deprecated section and update links to drop * link to interp_like instead of interpolate_like * update links in the manually written parts of the documentation * add missing methods for DatasetGroupBy, DataArrayGroupBy and Variable * update references in whats-new.rst * fix a few mistakes in the reference targets * add missing methods for Data*Rolling, Data*GroupBy and Data*Resample * add all CFTimeIndex methods * fix a few more broken links in whats-new.rst * remove documentation links for some non-public methods / functions * add missing methods to Data*Coarsen * move the coarsen objects into their own section in api.rst * use currentmodule instead of prefixing with ~xarray * add a new tutorial section * add show_versions and set_options * add FacetGrid to api.rst and update links * use plot.line instead of plot.plot * add the CFTimeIndex properties to api-hidden.rst * add the backend objects' methods to api-hidden.rst * add missing dict methods to api.rst * add the coordinates objects to api.rst * add the data store properties to api-hidden.rst * add IndexVariable methods and properties to api-hidden.rst * add properties for *Coarsen, *GroupBy, *Resample and *Rolling to api-hidden.rst * add IndexVariable.get_level_variable to api-hidden.rst * add the accessor methods / properties to api-hidden.rst * add the RollingExp method to api-hidden.rst * fix the docstring of StringAccessor.replace * mention load_store instead of from_store and generate a page for dump_to_store * also add load_store --- doc/api-hidden.rst | 653 +++++++++++++++++++++++++++++++++- doc/api.rst | 37 +- doc/groupby.rst | 6 +- doc/howdoi.rst | 4 +- doc/indexing.rst | 2 +- doc/interpolation.rst | 4 +- doc/io.rst | 86 ++--- doc/pandas.rst | 23 +- doc/plotting.rst | 19 +- doc/terminology.rst | 1 + doc/whats-new.rst | 50 +-- xarray/core/accessor_str.py | 4 +- xarray/core/resample.py | 4 + xarray/core/variable.py | 2 + xarray/util/print_versions.py | 7 + 15 files changed, 801 insertions(+), 101 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 027c732697f..c117b0f4fc7 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -27,6 +27,38 @@ Dataset.std Dataset.var + core.coordinates.DatasetCoordinates.get + core.coordinates.DatasetCoordinates.items + core.coordinates.DatasetCoordinates.keys + core.coordinates.DatasetCoordinates.merge + core.coordinates.DatasetCoordinates.to_dataset + core.coordinates.DatasetCoordinates.to_index + core.coordinates.DatasetCoordinates.update + core.coordinates.DatasetCoordinates.values + core.coordinates.DatasetCoordinates.dims + core.coordinates.DatasetCoordinates.indexes + core.coordinates.DatasetCoordinates.variables + + core.rolling.DatasetCoarsen.all + core.rolling.DatasetCoarsen.any + core.rolling.DatasetCoarsen.argmax + core.rolling.DatasetCoarsen.argmin + core.rolling.DatasetCoarsen.count + core.rolling.DatasetCoarsen.max + core.rolling.DatasetCoarsen.mean + core.rolling.DatasetCoarsen.median + core.rolling.DatasetCoarsen.min + core.rolling.DatasetCoarsen.prod + core.rolling.DatasetCoarsen.std + core.rolling.DatasetCoarsen.sum + core.rolling.DatasetCoarsen.var + core.rolling.DatasetCoarsen.boundary + core.rolling.DatasetCoarsen.coord_func + core.rolling.DatasetCoarsen.obj + core.rolling.DatasetCoarsen.side + core.rolling.DatasetCoarsen.trim_excess + core.rolling.DatasetCoarsen.windows + core.groupby.DatasetGroupBy.assign core.groupby.DatasetGroupBy.assign_coords core.groupby.DatasetGroupBy.first @@ -34,6 +66,69 @@ core.groupby.DatasetGroupBy.fillna core.groupby.DatasetGroupBy.quantile core.groupby.DatasetGroupBy.where + core.groupby.DatasetGroupBy.all + core.groupby.DatasetGroupBy.any + core.groupby.DatasetGroupBy.argmax + core.groupby.DatasetGroupBy.argmin + core.groupby.DatasetGroupBy.count + core.groupby.DatasetGroupBy.max + core.groupby.DatasetGroupBy.mean + core.groupby.DatasetGroupBy.median + core.groupby.DatasetGroupBy.min + core.groupby.DatasetGroupBy.prod + core.groupby.DatasetGroupBy.std + core.groupby.DatasetGroupBy.sum + core.groupby.DatasetGroupBy.var + core.groupby.DatasetGroupBy.dims + core.groupby.DatasetGroupBy.groups + + core.resample.DatasetResample.all + core.resample.DatasetResample.any + core.resample.DatasetResample.apply + core.resample.DatasetResample.argmax + core.resample.DatasetResample.argmin + core.resample.DatasetResample.assign + core.resample.DatasetResample.assign_coords + core.resample.DatasetResample.bfill + core.resample.DatasetResample.count + core.resample.DatasetResample.ffill + core.resample.DatasetResample.fillna + core.resample.DatasetResample.first + core.resample.DatasetResample.last + core.resample.DatasetResample.map + core.resample.DatasetResample.max + core.resample.DatasetResample.mean + core.resample.DatasetResample.median + core.resample.DatasetResample.min + core.resample.DatasetResample.prod + core.resample.DatasetResample.quantile + core.resample.DatasetResample.reduce + core.resample.DatasetResample.std + core.resample.DatasetResample.sum + core.resample.DatasetResample.var + core.resample.DatasetResample.where + core.resample.DatasetResample.dims + core.resample.DatasetResample.groups + + core.rolling.DatasetRolling.argmax + core.rolling.DatasetRolling.argmin + core.rolling.DatasetRolling.count + core.rolling.DatasetRolling.max + core.rolling.DatasetRolling.mean + core.rolling.DatasetRolling.median + core.rolling.DatasetRolling.min + core.rolling.DatasetRolling.prod + core.rolling.DatasetRolling.std + core.rolling.DatasetRolling.sum + core.rolling.DatasetRolling.var + core.rolling.DatasetRolling.center + core.rolling.DatasetRolling.dim + core.rolling.DatasetRolling.min_periods + core.rolling.DatasetRolling.obj + core.rolling.DatasetRolling.rollings + core.rolling.DatasetRolling.window + + core.rolling_exp.RollingExp.mean Dataset.argsort Dataset.astype @@ -47,6 +142,9 @@ Dataset.cumprod Dataset.rank + Dataset.load_store + Dataset.dump_to_store + DataArray.ndim DataArray.nbytes DataArray.shape @@ -71,12 +169,104 @@ DataArray.std DataArray.var + core.coordinates.DataArrayCoordinates.get + core.coordinates.DataArrayCoordinates.items + core.coordinates.DataArrayCoordinates.keys + core.coordinates.DataArrayCoordinates.merge + core.coordinates.DataArrayCoordinates.to_dataset + core.coordinates.DataArrayCoordinates.to_index + core.coordinates.DataArrayCoordinates.update + core.coordinates.DataArrayCoordinates.values + core.coordinates.DataArrayCoordinates.dims + core.coordinates.DataArrayCoordinates.indexes + core.coordinates.DataArrayCoordinates.variables + + core.rolling.DataArrayCoarsen.all + core.rolling.DataArrayCoarsen.any + core.rolling.DataArrayCoarsen.argmax + core.rolling.DataArrayCoarsen.argmin + core.rolling.DataArrayCoarsen.count + core.rolling.DataArrayCoarsen.max + core.rolling.DataArrayCoarsen.mean + core.rolling.DataArrayCoarsen.median + core.rolling.DataArrayCoarsen.min + core.rolling.DataArrayCoarsen.prod + core.rolling.DataArrayCoarsen.std + core.rolling.DataArrayCoarsen.sum + core.rolling.DataArrayCoarsen.var + core.rolling.DataArrayCoarsen.boundary + core.rolling.DataArrayCoarsen.coord_func + core.rolling.DataArrayCoarsen.obj + core.rolling.DataArrayCoarsen.side + core.rolling.DataArrayCoarsen.trim_excess + core.rolling.DataArrayCoarsen.windows + core.groupby.DataArrayGroupBy.assign_coords core.groupby.DataArrayGroupBy.first core.groupby.DataArrayGroupBy.last core.groupby.DataArrayGroupBy.fillna core.groupby.DataArrayGroupBy.quantile core.groupby.DataArrayGroupBy.where + core.groupby.DataArrayGroupBy.all + core.groupby.DataArrayGroupBy.any + core.groupby.DataArrayGroupBy.argmax + core.groupby.DataArrayGroupBy.argmin + core.groupby.DataArrayGroupBy.count + core.groupby.DataArrayGroupBy.max + core.groupby.DataArrayGroupBy.mean + core.groupby.DataArrayGroupBy.median + core.groupby.DataArrayGroupBy.min + core.groupby.DataArrayGroupBy.prod + core.groupby.DataArrayGroupBy.std + core.groupby.DataArrayGroupBy.sum + core.groupby.DataArrayGroupBy.var + core.groupby.DataArrayGroupBy.dims + core.groupby.DataArrayGroupBy.groups + + core.resample.DataArrayResample.all + core.resample.DataArrayResample.any + core.resample.DataArrayResample.apply + core.resample.DataArrayResample.argmax + core.resample.DataArrayResample.argmin + core.resample.DataArrayResample.assign_coords + core.resample.DataArrayResample.bfill + core.resample.DataArrayResample.count + core.resample.DataArrayResample.ffill + core.resample.DataArrayResample.fillna + core.resample.DataArrayResample.first + core.resample.DataArrayResample.last + core.resample.DataArrayResample.map + core.resample.DataArrayResample.max + core.resample.DataArrayResample.mean + core.resample.DataArrayResample.median + core.resample.DataArrayResample.min + core.resample.DataArrayResample.prod + core.resample.DataArrayResample.quantile + core.resample.DataArrayResample.reduce + core.resample.DataArrayResample.std + core.resample.DataArrayResample.sum + core.resample.DataArrayResample.var + core.resample.DataArrayResample.where + core.resample.DataArrayResample.dims + core.resample.DataArrayResample.groups + + core.rolling.DataArrayRolling.argmax + core.rolling.DataArrayRolling.argmin + core.rolling.DataArrayRolling.count + core.rolling.DataArrayRolling.max + core.rolling.DataArrayRolling.mean + core.rolling.DataArrayRolling.median + core.rolling.DataArrayRolling.min + core.rolling.DataArrayRolling.prod + core.rolling.DataArrayRolling.std + core.rolling.DataArrayRolling.sum + core.rolling.DataArrayRolling.var + core.rolling.DataArrayRolling.center + core.rolling.DataArrayRolling.dim + core.rolling.DataArrayRolling.min_periods + core.rolling.DataArrayRolling.obj + core.rolling.DataArrayRolling.window + core.rolling.DataArrayRolling.window_labels DataArray.argsort DataArray.clip @@ -91,6 +281,221 @@ DataArray.cumprod DataArray.rank + core.accessor_dt.DatetimeAccessor.ceil + core.accessor_dt.DatetimeAccessor.floor + core.accessor_dt.DatetimeAccessor.round + core.accessor_dt.DatetimeAccessor.strftime + core.accessor_dt.DatetimeAccessor.day + core.accessor_dt.DatetimeAccessor.dayofweek + core.accessor_dt.DatetimeAccessor.dayofyear + core.accessor_dt.DatetimeAccessor.days_in_month + core.accessor_dt.DatetimeAccessor.daysinmonth + core.accessor_dt.DatetimeAccessor.hour + core.accessor_dt.DatetimeAccessor.microsecond + core.accessor_dt.DatetimeAccessor.minute + core.accessor_dt.DatetimeAccessor.month + core.accessor_dt.DatetimeAccessor.nanosecond + core.accessor_dt.DatetimeAccessor.quarter + core.accessor_dt.DatetimeAccessor.season + core.accessor_dt.DatetimeAccessor.second + core.accessor_dt.DatetimeAccessor.time + core.accessor_dt.DatetimeAccessor.week + core.accessor_dt.DatetimeAccessor.weekday + core.accessor_dt.DatetimeAccessor.weekday_name + core.accessor_dt.DatetimeAccessor.weekofyear + core.accessor_dt.DatetimeAccessor.year + + core.accessor_str.StringAccessor.capitalize + core.accessor_str.StringAccessor.center + core.accessor_str.StringAccessor.contains + core.accessor_str.StringAccessor.count + core.accessor_str.StringAccessor.decode + core.accessor_str.StringAccessor.encode + core.accessor_str.StringAccessor.endswith + core.accessor_str.StringAccessor.find + core.accessor_str.StringAccessor.get + core.accessor_str.StringAccessor.index + core.accessor_str.StringAccessor.isalnum + core.accessor_str.StringAccessor.isalpha + core.accessor_str.StringAccessor.isdecimal + core.accessor_str.StringAccessor.isdigit + core.accessor_str.StringAccessor.islower + core.accessor_str.StringAccessor.isnumeric + core.accessor_str.StringAccessor.isspace + core.accessor_str.StringAccessor.istitle + core.accessor_str.StringAccessor.isupper + core.accessor_str.StringAccessor.len + core.accessor_str.StringAccessor.ljust + core.accessor_str.StringAccessor.lower + core.accessor_str.StringAccessor.lstrip + core.accessor_str.StringAccessor.match + core.accessor_str.StringAccessor.pad + core.accessor_str.StringAccessor.repeat + core.accessor_str.StringAccessor.replace + core.accessor_str.StringAccessor.rfind + core.accessor_str.StringAccessor.rindex + core.accessor_str.StringAccessor.rjust + core.accessor_str.StringAccessor.rstrip + core.accessor_str.StringAccessor.slice + core.accessor_str.StringAccessor.slice_replace + core.accessor_str.StringAccessor.startswith + core.accessor_str.StringAccessor.strip + core.accessor_str.StringAccessor.swapcase + core.accessor_str.StringAccessor.title + core.accessor_str.StringAccessor.translate + core.accessor_str.StringAccessor.upper + core.accessor_str.StringAccessor.wrap + core.accessor_str.StringAccessor.zfill + + Variable.all + Variable.any + Variable.argmax + Variable.argmin + Variable.argsort + Variable.astype + Variable.broadcast_equals + Variable.chunk + Variable.clip + Variable.coarsen + Variable.compute + Variable.concat + Variable.conj + Variable.conjugate + Variable.copy + Variable.count + Variable.cumprod + Variable.cumsum + Variable.equals + Variable.fillna + Variable.get_axis_num + Variable.identical + Variable.isel + Variable.isnull + Variable.item + Variable.load + Variable.max + Variable.mean + Variable.median + Variable.min + Variable.no_conflicts + Variable.notnull + Variable.pad_with_fill_value + Variable.prod + Variable.quantile + Variable.rank + Variable.reduce + Variable.roll + Variable.rolling_window + Variable.round + Variable.searchsorted + Variable.set_dims + Variable.shift + Variable.squeeze + Variable.stack + Variable.std + Variable.sum + Variable.to_base_variable + Variable.to_coord + Variable.to_dict + Variable.to_index + Variable.to_index_variable + Variable.to_variable + Variable.transpose + Variable.unstack + Variable.var + Variable.where + Variable.T + Variable.attrs + Variable.chunks + Variable.data + Variable.dims + Variable.dtype + Variable.encoding + Variable.imag + Variable.nbytes + Variable.ndim + Variable.real + Variable.shape + Variable.size + Variable.sizes + Variable.values + + IndexVariable.all + IndexVariable.any + IndexVariable.argmax + IndexVariable.argmin + IndexVariable.argsort + IndexVariable.astype + IndexVariable.broadcast_equals + IndexVariable.chunk + IndexVariable.clip + IndexVariable.coarsen + IndexVariable.compute + IndexVariable.concat + IndexVariable.conj + IndexVariable.conjugate + IndexVariable.copy + IndexVariable.count + IndexVariable.cumprod + IndexVariable.cumsum + IndexVariable.equals + IndexVariable.fillna + IndexVariable.get_axis_num + IndexVariable.get_level_variable + IndexVariable.identical + IndexVariable.isel + IndexVariable.isnull + IndexVariable.item + IndexVariable.load + IndexVariable.max + IndexVariable.mean + IndexVariable.median + IndexVariable.min + IndexVariable.no_conflicts + IndexVariable.notnull + IndexVariable.pad_with_fill_value + IndexVariable.prod + IndexVariable.quantile + IndexVariable.rank + IndexVariable.reduce + IndexVariable.roll + IndexVariable.rolling_window + IndexVariable.round + IndexVariable.searchsorted + IndexVariable.set_dims + IndexVariable.shift + IndexVariable.squeeze + IndexVariable.stack + IndexVariable.std + IndexVariable.sum + IndexVariable.to_base_variable + IndexVariable.to_coord + IndexVariable.to_dict + IndexVariable.to_index + IndexVariable.to_index_variable + IndexVariable.to_variable + IndexVariable.transpose + IndexVariable.unstack + IndexVariable.var + IndexVariable.where + IndexVariable.T + IndexVariable.attrs + IndexVariable.chunks + IndexVariable.data + IndexVariable.dims + IndexVariable.dtype + IndexVariable.encoding + IndexVariable.imag + IndexVariable.level_names + IndexVariable.name + IndexVariable.nbytes + IndexVariable.ndim + IndexVariable.real + IndexVariable.shape + IndexVariable.size + IndexVariable.sizes + IndexVariable.values + ufuncs.angle ufuncs.arccos ufuncs.arccosh @@ -156,6 +561,252 @@ plot.FacetGrid.set_ticks plot.FacetGrid.map + CFTimeIndex.all + CFTimeIndex.any + CFTimeIndex.append + CFTimeIndex.argmax + CFTimeIndex.argmin + CFTimeIndex.argsort + CFTimeIndex.asof + CFTimeIndex.asof_locs + CFTimeIndex.astype + CFTimeIndex.contains + CFTimeIndex.copy + CFTimeIndex.delete + CFTimeIndex.difference + CFTimeIndex.drop + CFTimeIndex.drop_duplicates + CFTimeIndex.droplevel + CFTimeIndex.dropna + CFTimeIndex.duplicated + CFTimeIndex.equals + CFTimeIndex.factorize + CFTimeIndex.fillna + CFTimeIndex.format + CFTimeIndex.get_duplicates + CFTimeIndex.get_indexer + CFTimeIndex.get_indexer_for + CFTimeIndex.get_indexer_non_unique + CFTimeIndex.get_level_values + CFTimeIndex.get_loc + CFTimeIndex.get_slice_bound + CFTimeIndex.get_value + CFTimeIndex.get_values + CFTimeIndex.groupby + CFTimeIndex.holds_integer + CFTimeIndex.identical + CFTimeIndex.insert + CFTimeIndex.intersection + CFTimeIndex.is_ + CFTimeIndex.is_boolean + CFTimeIndex.is_categorical + CFTimeIndex.is_floating + CFTimeIndex.is_integer + CFTimeIndex.is_interval + CFTimeIndex.is_lexsorted_for_tuple + CFTimeIndex.is_mixed + CFTimeIndex.is_numeric + CFTimeIndex.is_object + CFTimeIndex.is_type_compatible + CFTimeIndex.isin + CFTimeIndex.isna + CFTimeIndex.isnull + CFTimeIndex.item + CFTimeIndex.join + CFTimeIndex.map + CFTimeIndex.max + CFTimeIndex.memory_usage + CFTimeIndex.min + CFTimeIndex.notna + CFTimeIndex.notnull + CFTimeIndex.nunique + CFTimeIndex.putmask + CFTimeIndex.ravel + CFTimeIndex.reindex + CFTimeIndex.rename + CFTimeIndex.repeat + CFTimeIndex.searchsorted + CFTimeIndex.set_names + CFTimeIndex.set_value CFTimeIndex.shift - CFTimeIndex.to_datetimeindex + CFTimeIndex.slice_indexer + CFTimeIndex.slice_locs + CFTimeIndex.sort + CFTimeIndex.sort_values + CFTimeIndex.sortlevel CFTimeIndex.strftime + CFTimeIndex.summary + CFTimeIndex.symmetric_difference + CFTimeIndex.take + CFTimeIndex.to_datetimeindex + CFTimeIndex.to_flat_index + CFTimeIndex.to_frame + CFTimeIndex.to_list + CFTimeIndex.to_native_types + CFTimeIndex.to_numpy + CFTimeIndex.to_series + CFTimeIndex.tolist + CFTimeIndex.transpose + CFTimeIndex.union + CFTimeIndex.unique + CFTimeIndex.value_counts + CFTimeIndex.view + CFTimeIndex.where + + CFTimeIndex.T + CFTimeIndex.array + CFTimeIndex.asi8 + CFTimeIndex.base + CFTimeIndex.data + CFTimeIndex.date_type + CFTimeIndex.day + CFTimeIndex.dayofweek + CFTimeIndex.dayofyear + CFTimeIndex.dtype + CFTimeIndex.dtype_str + CFTimeIndex.empty + CFTimeIndex.flags + CFTimeIndex.has_duplicates + CFTimeIndex.hasnans + CFTimeIndex.hour + CFTimeIndex.inferred_type + CFTimeIndex.is_all_dates + CFTimeIndex.is_monotonic + CFTimeIndex.is_monotonic_increasing + CFTimeIndex.is_monotonic_decreasing + CFTimeIndex.is_unique + CFTimeIndex.itemsize + CFTimeIndex.microsecond + CFTimeIndex.minute + CFTimeIndex.month + CFTimeIndex.name + CFTimeIndex.names + CFTimeIndex.nbytes + CFTimeIndex.ndim + CFTimeIndex.nlevels + CFTimeIndex.second + CFTimeIndex.shape + CFTimeIndex.size + CFTimeIndex.strides + CFTimeIndex.values + CFTimeIndex.year + + backends.NetCDF4DataStore.close + backends.NetCDF4DataStore.encode + backends.NetCDF4DataStore.encode_attribute + backends.NetCDF4DataStore.encode_variable + backends.NetCDF4DataStore.get + backends.NetCDF4DataStore.get_attrs + backends.NetCDF4DataStore.get_dimensions + backends.NetCDF4DataStore.get_encoding + backends.NetCDF4DataStore.get_variables + backends.NetCDF4DataStore.items + backends.NetCDF4DataStore.keys + backends.NetCDF4DataStore.load + backends.NetCDF4DataStore.open + backends.NetCDF4DataStore.open_store_variable + backends.NetCDF4DataStore.prepare_variable + backends.NetCDF4DataStore.set_attribute + backends.NetCDF4DataStore.set_attributes + backends.NetCDF4DataStore.set_dimension + backends.NetCDF4DataStore.set_dimensions + backends.NetCDF4DataStore.set_variable + backends.NetCDF4DataStore.set_variables + backends.NetCDF4DataStore.store + backends.NetCDF4DataStore.store_dataset + backends.NetCDF4DataStore.sync + backends.NetCDF4DataStore.values + backends.NetCDF4DataStore.attrs + backends.NetCDF4DataStore.autoclose + backends.NetCDF4DataStore.dimensions + backends.NetCDF4DataStore.ds + backends.NetCDF4DataStore.format + backends.NetCDF4DataStore.is_remote + backends.NetCDF4DataStore.lock + backends.NetCDF4DataStore.variables + + backends.H5NetCDFStore.close + backends.H5NetCDFStore.encode + backends.H5NetCDFStore.encode_attribute + backends.H5NetCDFStore.encode_variable + backends.H5NetCDFStore.get + backends.H5NetCDFStore.get_attrs + backends.H5NetCDFStore.get_dimensions + backends.H5NetCDFStore.get_encoding + backends.H5NetCDFStore.get_variables + backends.H5NetCDFStore.items + backends.H5NetCDFStore.keys + backends.H5NetCDFStore.load + backends.H5NetCDFStore.open_store_variable + backends.H5NetCDFStore.prepare_variable + backends.H5NetCDFStore.set_attribute + backends.H5NetCDFStore.set_attributes + backends.H5NetCDFStore.set_dimension + backends.H5NetCDFStore.set_dimensions + backends.H5NetCDFStore.set_variable + backends.H5NetCDFStore.set_variables + backends.H5NetCDFStore.store + backends.H5NetCDFStore.store_dataset + backends.H5NetCDFStore.sync + backends.H5NetCDFStore.values + backends.H5NetCDFStore.attrs + backends.H5NetCDFStore.dimensions + backends.H5NetCDFStore.ds + backends.H5NetCDFStore.variables + + backends.PydapDataStore.close + backends.PydapDataStore.get + backends.PydapDataStore.get_attrs + backends.PydapDataStore.get_dimensions + backends.PydapDataStore.get_encoding + backends.PydapDataStore.get_variables + backends.PydapDataStore.items + backends.PydapDataStore.keys + backends.PydapDataStore.load + backends.PydapDataStore.open + backends.PydapDataStore.open_store_variable + backends.PydapDataStore.values + backends.PydapDataStore.attrs + backends.PydapDataStore.dimensions + backends.PydapDataStore.variables + + backends.ScipyDataStore.close + backends.ScipyDataStore.encode + backends.ScipyDataStore.encode_attribute + backends.ScipyDataStore.encode_variable + backends.ScipyDataStore.get + backends.ScipyDataStore.get_attrs + backends.ScipyDataStore.get_dimensions + backends.ScipyDataStore.get_encoding + backends.ScipyDataStore.get_variables + backends.ScipyDataStore.items + backends.ScipyDataStore.keys + backends.ScipyDataStore.load + backends.ScipyDataStore.open_store_variable + backends.ScipyDataStore.prepare_variable + backends.ScipyDataStore.set_attribute + backends.ScipyDataStore.set_attributes + backends.ScipyDataStore.set_dimension + backends.ScipyDataStore.set_dimensions + backends.ScipyDataStore.set_variable + backends.ScipyDataStore.set_variables + backends.ScipyDataStore.store + backends.ScipyDataStore.store_dataset + backends.ScipyDataStore.sync + backends.ScipyDataStore.values + backends.ScipyDataStore.attrs + backends.ScipyDataStore.dimensions + backends.ScipyDataStore.ds + backends.ScipyDataStore.variables + + backends.FileManager.acquire + backends.FileManager.acquire_context + backends.FileManager.close + + backends.CachingFileManager.acquire + backends.CachingFileManager.acquire_context + backends.CachingFileManager.close + + backends.DummyFileManager.acquire + backends.DummyFileManager.acquire_context + backends.DummyFileManager.close diff --git a/doc/api.rst b/doc/api.rst index 8b523b7837c..d3491e020fd 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -31,6 +31,8 @@ Top-level functions ones_like dot map_blocks + show_versions + set_options Dataset ======= @@ -74,7 +76,9 @@ and values given by ``DataArray`` objects. Dataset.__setitem__ Dataset.__delitem__ Dataset.update + Dataset.get Dataset.items + Dataset.keys Dataset.values Dataset contents @@ -537,6 +541,15 @@ DataArray methods DataArray.unify_chunks DataArray.map_blocks +Coordinates objects +=================== + +.. autosummary:: + :toctree: generated/ + + core.coordinates.DataArrayCoordinates + core.coordinates.DatasetCoordinates + GroupBy objects =============== @@ -564,6 +577,16 @@ Rolling objects core.rolling.DatasetRolling.reduce core.rolling_exp.RollingExp +Coarsen objects +=============== + +.. autosummary:: + :toctree: generated/ + + core.rolling.DataArrayCoarsen + core.rolling.DatasetCoarsen + + Resample objects ================ @@ -625,6 +648,7 @@ Plotting plot.imshow plot.line plot.pcolormesh + plot.FacetGrid Faceting -------- @@ -644,6 +668,14 @@ Faceting plot.FacetGrid.set_xlabels plot.FacetGrid.set_ylabels +Tutorial +======== + +.. autosummary:: + :toctree: generated/ + + tutorial.open_dataset + tutorial.load_dataset Testing ======= @@ -681,7 +713,7 @@ Advanced API These backends provide a low-level interface for lazily loading data from external file-formats or protocols, and can be manually invoked to create -arguments for the ``from_store`` and ``dump_to_store`` Dataset methods: +arguments for the ``load_store`` and ``dump_to_store`` Dataset methods: .. autosummary:: :toctree: generated/ @@ -697,6 +729,9 @@ arguments for the ``from_store`` and ``dump_to_store`` Dataset methods: Deprecated / Pending Deprecation ================================ +.. autosummary:: + :toctree: generated/ + Dataset.drop DataArray.drop Dataset.apply diff --git a/doc/groupby.rst b/doc/groupby.rst index f5943703765..927e192eb6c 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -94,7 +94,7 @@ Apply ~~~~~ To apply a function to each group, you can use the flexible -:py:meth:`~xarray.DatasetGroupBy.map` method. The resulting objects are automatically +:py:meth:`~xarray.core.groupby.DatasetGroupBy.map` method. The resulting objects are automatically concatenated back together along the group axis: .. ipython:: python @@ -104,8 +104,8 @@ concatenated back together along the group axis: arr.groupby('letters').map(standardize) -GroupBy objects also have a :py:meth:`~xarray.DatasetGroupBy.reduce` method and -methods like :py:meth:`~xarray.DatasetGroupBy.mean` as shortcuts for applying an +GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and +methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an aggregation function: .. ipython:: python diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 91644ba2718..80266bd3b84 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -22,7 +22,7 @@ How do I ... * - change the order of dimensions - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose` * - remove a variable from my object - - :py:meth:`Dataset.drop`, :py:meth:`DataArray.drop` + - :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars` * - remove dimensions of length 1 or 0 - :py:meth:`DataArray.squeeze`, :py:meth:`Dataset.squeeze` * - remove all variables with a particular dimension @@ -48,7 +48,7 @@ How do I ... * - write xarray objects with complex values to a netCDF file - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True`` * - make xarray objects look like other xarray objects - - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interpolate_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interpolate_like`, :py:meth:`DataArray.broadcast_like` + - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interp_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interp_like`, :py:meth:`DataArray.broadcast_like` * - replace NaNs with other values - :py:meth:`Dataset.fillna`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, :py:meth:`Dataset.interpolate_na`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.interpolate_na` * - extract the year, month, day or similar from a DataArray of time values diff --git a/doc/indexing.rst b/doc/indexing.rst index e8482ac66b3..cfbb84a8343 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -132,7 +132,7 @@ use them explicitly to slice data. There are two ways to do this: The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, -Python :py:func:`slice` objects or 1-dimensional arrays. +Python :py:class:`slice` objects or 1-dimensional arrays. .. note:: diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 7c750506cf3..63e9a7cd35e 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -48,7 +48,7 @@ array-like, which gives the interpolated result as an array. # interpolation da.interp(time=[2.5, 3.5]) -To interpolate data with a :py:func:`numpy.datetime64` coordinate you can pass a string. +To interpolate data with a :py:doc:`numpy.datetime64 ` coordinate you can pass a string. .. ipython:: python @@ -128,7 +128,7 @@ It is now possible to safely compute the difference ``other - interpolated``. Interpolation methods --------------------- -We use :py:func:`scipy.interpolate.interp1d` for 1-dimensional interpolation and +We use :py:class:`scipy.interpolate.interp1d` for 1-dimensional interpolation and :py:func:`scipy.interpolate.interpn` for multi-dimensional interpolation. The interpolation method can be specified by the optional ``method`` argument. diff --git a/doc/io.rst b/doc/io.rst index 2e50e5639da..e910943236f 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _io: Reading and writing files @@ -23,8 +24,8 @@ netCDF The recommended way to store xarray data structures is `netCDF`__, which is a binary file format for self-described datasets that originated in the geosciences. xarray is based on the netCDF data model, so netCDF files -on disk directly correspond to :py:class:`~xarray.Dataset` objects (more accurately, -a group in a netCDF file directly corresponds to a to :py:class:`~xarray.Dataset` object. +on disk directly correspond to :py:class:`Dataset` objects (more accurately, +a group in a netCDF file directly corresponds to a to :py:class:`Dataset` object. See :ref:`io.netcdf_groups` for more.) NetCDF is supported on almost all platforms, and parsers exist @@ -47,7 +48,7 @@ read/write netCDF V4 files and use the compression options described below). __ https://github.com/Unidata/netcdf4-python We can save a Dataset to disk using the -:py:meth:`~Dataset.to_netcdf` method: +:py:meth:`Dataset.to_netcdf` method: .. ipython:: python @@ -65,13 +66,13 @@ the ``format`` and ``engine`` arguments. .. tip:: Using the `h5netcdf `_ package - by passing ``engine='h5netcdf'`` to :py:meth:`~xarray.open_dataset` can + by passing ``engine='h5netcdf'`` to :py:meth:`open_dataset` can sometimes be quicker than the default ``engine='netcdf4'`` that uses the `netCDF4 `_ package. We can load netCDF files to create a new Dataset using -:py:func:`~xarray.open_dataset`: +:py:func:`open_dataset`: .. ipython:: python @@ -79,9 +80,9 @@ We can load netCDF files to create a new Dataset using ds_disk Similarly, a DataArray can be saved to disk using the -:py:attr:`DataArray.to_netcdf ` method, and loaded -from disk using the :py:func:`~xarray.open_dataarray` function. As netCDF files -correspond to :py:class:`~xarray.Dataset` objects, these functions internally +:py:meth:`DataArray.to_netcdf` method, and loaded +from disk using the :py:func:`open_dataarray` function. As netCDF files +correspond to :py:class:`Dataset` objects, these functions internally convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back when loading, ensuring that the ``DataArray`` that is loaded is always exactly the same as the one that was saved. @@ -108,9 +109,9 @@ is modified: the original file on disk is never touched. xarray's lazy loading of remote or on-disk datasets is often but not always desirable. Before performing computationally intense operations, it is often a good idea to load a Dataset (or DataArray) entirely into memory by - invoking the :py:meth:`~xarray.Dataset.load` method. + invoking the :py:meth:`Dataset.load` method. -Datasets have a :py:meth:`~xarray.Dataset.close` method to close the associated +Datasets have a :py:meth:`Dataset.close` method to close the associated netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python @@ -135,17 +136,17 @@ to the original netCDF file, regardless if they exist in the original dataset. Groups ~~~~~~ -NetCDF groups are not supported as part of the :py:class:`~xarray.Dataset` data model. +NetCDF groups are not supported as part of the :py:class:`Dataset` data model. Instead, groups can be loaded individually as Dataset objects. To do so, pass a ``group`` keyword argument to the -:py:func:`~xarray.open_dataset` function. The group can be specified as a path-like +:py:func:`open_dataset` function. The group can be specified as a path-like string, e.g., to access subgroup ``'bar'`` within group ``'foo'`` pass ``'/foo/bar'`` as the ``group`` argument. In a similar way, the ``group`` keyword argument can be given to the -:py:meth:`~xarray.Dataset.to_netcdf` method to write to a group +:py:meth:`Dataset.to_netcdf` method to write to a group in a netCDF file. When writing multiple groups in one file, pass ``mode='a'`` to -:py:meth:`~xarray.Dataset.to_netcdf` to ensure that each call does not delete the file. +:py:meth:`Dataset.to_netcdf` to ensure that each call does not delete the file. .. _io.encoding: @@ -155,7 +156,7 @@ Reading encoded data NetCDF files follow some conventions for encoding datetime arrays (as numbers with a "units" attribute) and for packing and unpacking data (as described by the "scale_factor" and "add_offset" attributes). If the argument -``decode_cf=True`` (default) is given to :py:func:`~xarray.open_dataset`, xarray will attempt +``decode_cf=True`` (default) is given to :py:func:`open_dataset`, xarray will attempt to automatically decode the values in the netCDF objects according to `CF conventions`_. Sometimes this will fail, for example, if a variable has an invalid "units" or "calendar" attribute. For these cases, you can @@ -164,8 +165,8 @@ turn this decoding off manually. .. _CF conventions: http://cfconventions.org/ You can view this encoding information (among others) in the -:py:attr:`DataArray.encoding ` and -:py:attr:`DataArray.encoding ` attributes: +:py:attr:`DataArray.encoding` and +:py:attr:`DataArray.encoding` attributes: .. ipython:: :verbatim: @@ -206,13 +207,13 @@ Reading multi-file datasets NetCDF files are often encountered in collections, e.g., with different files corresponding to different model runs or one file per timestamp. xarray can straightforwardly combine such files into a single Dataset by making use of -:py:func:`~xarray.concat`, :py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and -:py:func:`~xarray.combine_by_coords`. For details on the difference between these +:py:func:`concat`, :py:func:`merge`, :py:func:`combine_nested` and +:py:func:`combine_by_coords`. For details on the difference between these functions see :ref:`combining data`. Xarray includes support for manipulating datasets that don't fit into memory with dask_. If you have dask installed, you can open multiple files -simultaneously in parallel using :py:func:`~xarray.open_mfdataset`:: +simultaneously in parallel using :py:func:`open_mfdataset`:: xr.open_mfdataset('my/files/*.nc', parallel=True) @@ -221,7 +222,7 @@ single xarray dataset. It is the recommended way to open multiple files with xarray. For more details on parallel reading, see :ref:`combining.multi`, :ref:`dask.io` and a `blog post`_ by Stephan Hoyer. -:py:func:`~xarray.open_mfdataset` takes many kwargs that allow you to +:py:func:`open_mfdataset` takes many kwargs that allow you to control its behaviour (for e.g. ``parallel``, ``combine``, ``compat``, ``join``, ``concat_dim``). See its docstring for more details. @@ -246,14 +247,14 @@ See its docstring for more details. .. _dask: http://dask.pydata.org .. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ -Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`~xarray.open_mfdataset`. +Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`open_mfdataset`. One can use the ``preprocess`` argument to provide a function that takes a dataset and returns a modified Dataset. -:py:func:`~xarray.open_mfdataset` will call ``preprocess`` on every dataset +:py:func:`open_mfdataset` will call ``preprocess`` on every dataset (corresponding to each file) prior to combining them. -If :py:func:`~xarray.open_mfdataset` does not meet your needs, other approaches are possible. +If :py:func:`open_mfdataset` does not meet your needs, other approaches are possible. The general pattern for parallel reading of multiple files using dask, modifying those datasets and then combining into a single ``Dataset`` is:: @@ -459,9 +460,9 @@ Invalid netCDF files The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't allowed in netCDF4 (see -`h5netcdf documentation `_. -This feature is availabe through :py:func:`DataArray.to_netcdf` and -:py:func:`Dataset.to_netcdf` when used with ``engine="h5netcdf"`` +`h5netcdf documentation `_). +This feature is availabe through :py:meth:`DataArray.to_netcdf` and +:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"`` and currently raises a warning unless ``invalid_netcdf=True`` is set: .. ipython:: python @@ -494,7 +495,7 @@ The Iris_ tool allows easy reading of common meteorological and climate model fo (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is installed xarray can convert a ``DataArray`` into a ``Cube`` using -:py:meth:`~xarray.DataArray.to_iris`: +:py:meth:`DataArray.to_iris`: .. ipython:: python @@ -506,7 +507,7 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using cube Conversely, we can create a new ``DataArray`` object from a ``Cube`` using -:py:meth:`~xarray.DataArray.from_iris`: +:py:meth:`DataArray.from_iris`: .. ipython:: python @@ -608,7 +609,7 @@ over the network until we look at particular values: .. image:: _static/opendap-prism-tmax.png Some servers require authentication before we can access the data. For this -purpose we can explicitly create a :py:class:`~xarray.backends.PydapDataStore` +purpose we can explicitly create a :py:class:`backends.PydapDataStore` and pass in a `Requests`__ session object. For example for HTTP Basic authentication:: @@ -671,8 +672,8 @@ this version of xarray will work in future versions. When pickling an object opened from a NetCDF file, the pickle file will contain a reference to the file on disk. If you want to store the actual - array values, load it into memory first with :py:meth:`~xarray.Dataset.load` - or :py:meth:`~xarray.Dataset.compute`. + array values, load it into memory first with :py:meth:`Dataset.load` + or :py:meth:`Dataset.compute`. .. _dictionary io: @@ -680,7 +681,7 @@ Dictionary ---------- We can convert a ``Dataset`` (or a ``DataArray``) to a dict using -:py:meth:`~xarray.Dataset.to_dict`: +:py:meth:`Dataset.to_dict`: .. ipython:: python @@ -688,7 +689,7 @@ We can convert a ``Dataset`` (or a ``DataArray``) to a dict using d We can create a new xarray object from a dict using -:py:meth:`~xarray.Dataset.from_dict`: +:py:meth:`Dataset.from_dict`: .. ipython:: python @@ -723,7 +724,7 @@ Rasterio GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if rasterio is installed. Here is an example of how to use -:py:func:`~xarray.open_rasterio` to read one of rasterio's `test files`_: +:py:func:`open_rasterio` to read one of rasterio's `test files`_: .. ipython:: :verbatim: @@ -782,8 +783,7 @@ Xarray's Zarr backend allows xarray to leverage these capabilities. Xarray can't open just any zarr dataset, because xarray requires special metadata (attributes) describing the dataset dimensions and coordinates. At this time, xarray can only open zarr datasets that have been written by -xarray. To write a dataset with zarr, we use the -:py:attr:`Dataset.to_zarr ` method. +xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. To write to a local directory, we pass a path to a directory .. ipython:: python @@ -830,7 +830,7 @@ can be omitted as it will internally be set to ``'a'``. To store variable length strings use ``dtype=object``. To read back a zarr dataset that has been created this way, we use the -:py:func:`~xarray.open_zarr` method: +:py:func:`open_zarr` method: .. ipython:: python @@ -899,12 +899,12 @@ opening the store. (For more information on this feature, consult the If you have zarr version 2.3 or greater, xarray can write and read stores with consolidated metadata. To write consolidated metadata, pass the ``consolidated=True`` option to the -:py:attr:`Dataset.to_zarr ` method:: +:py:attr:`Dataset.to_zarr` method:: ds.to_zarr('foo.zarr', consolidated=True) To read a consolidated store, pass the ``consolidated=True`` option to -:py:func:`~xarray.open_zarr`:: +:py:func:`open_zarr`:: ds = xr.open_zarr('foo.zarr', consolidated=True) @@ -926,7 +926,7 @@ GRIB format via cfgrib xarray supports reading GRIB files via ECMWF cfgrib_ python driver and ecCodes_ C-library, if they are installed. To open a GRIB file supply ``engine='cfgrib'`` -to :py:func:`~xarray.open_dataset`: +to :py:func:`open_dataset`: .. ipython:: :verbatim: @@ -948,7 +948,7 @@ Formats supported by PyNIO xarray can also read GRIB, HDF4 and other file formats supported by PyNIO_, if PyNIO is installed. To use PyNIO to read such files, supply -``engine='pynio'`` to :py:func:`~xarray.open_dataset`. +``engine='pynio'`` to :py:func:`open_dataset`. We recommend installing PyNIO via conda:: @@ -970,7 +970,7 @@ identify readers heuristically, or format can be specified via a key in `backend_kwargs`. To use PseudoNetCDF to read such files, supply -``engine='pseudonetcdf'`` to :py:func:`~xarray.open_dataset`. +``engine='pseudonetcdf'`` to :py:func:`open_dataset`. Add ``backend_kwargs={'format': ''}`` where `` options are listed on the PseudoNetCDF page. diff --git a/doc/pandas.rst b/doc/pandas.rst index 72abf6609f6..a84c89ab938 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _pandas: =================== @@ -32,9 +33,9 @@ Tabular data is easiest to work with when it meets the criteria for __ http://www.jstatsoft.org/v59/i10/ -In this "tidy data" format, we can represent any :py:class:`~xarray.Dataset` and -:py:class:`~xarray.DataArray` in terms of :py:class:`pandas.DataFrame` and -:py:class:`pandas.Series`, respectively (and vice-versa). The representation +In this "tidy data" format, we can represent any :py:class:`Dataset` and +:py:class:`DataArray` in terms of :py:class:`~pandas.DataFrame` and +:py:class:`~pandas.Series`, respectively (and vice-versa). The representation works by flattening non-coordinates to 1D, and turning the tensor product of coordinate indexes into a :py:class:`pandas.MultiIndex`. @@ -42,7 +43,7 @@ Dataset and DataFrame --------------------- To convert any dataset to a ``DataFrame`` in tidy form, use the -:py:meth:`Dataset.to_dataframe() ` method: +:py:meth:`Dataset.to_dataframe()` method: .. ipython:: python @@ -61,11 +62,11 @@ use ``DataFrame`` methods like :py:meth:`~pandas.DataFrame.reset_index`, :py:meth:`~pandas.DataFrame.stack` and :py:meth:`~pandas.DataFrame.unstack`. For datasets containing dask arrays where the data should be lazily loaded, see the -:py:meth:`Dataset.to_dask_dataframe() ` method. +:py:meth:`Dataset.to_dask_dataframe()` method. To create a ``Dataset`` from a ``DataFrame``, use the -:py:meth:`~xarray.Dataset.from_dataframe` class method or the equivalent -:py:meth:`pandas.DataFrame.to_xarray ` method: +:py:meth:`Dataset.from_dataframe` class method or the equivalent +:py:meth:`pandas.DataFrame.to_xarray` method: .. ipython:: python @@ -83,7 +84,7 @@ DataArray and Series -------------------- ``DataArray`` objects have a complementary representation in terms of a -:py:class:`pandas.Series`. Using a Series preserves the ``Dataset`` to +:py:class:`~pandas.Series`. Using a Series preserves the ``Dataset`` to ``DataArray`` relationship, because ``DataFrames`` are dict-like containers of ``Series``. The methods are very similar to those for working with DataFrames: @@ -109,7 +110,7 @@ Multi-dimensional data Tidy data is great, but it sometimes you want to preserve dimensions instead of automatically stacking them into a ``MultiIndex``. -:py:meth:`DataArray.to_pandas() ` is a shortcut that +:py:meth:`DataArray.to_pandas()` is a shortcut that lets you convert a DataArray directly into a pandas object with the same dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`, 2D to :py:class:`~pandas.DataFrame` and 3D to ``pandas.Panel``): @@ -122,7 +123,7 @@ dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`, df To perform the inverse operation of converting any pandas objects into a data -array with the same shape, simply use the :py:class:`~xarray.DataArray` +array with the same shape, simply use the :py:class:`DataArray` constructor: .. ipython:: python @@ -143,7 +144,7 @@ preserve all use of multi-indexes: However, you will need to set dimension names explicitly, either with the ``dims`` argument on in the ``DataArray`` constructor or by calling -:py:class:`~xarray.Dataset.rename` on the new object. +:py:class:`~Dataset.rename` on the new object. .. _panel transition: diff --git a/doc/plotting.rst b/doc/plotting.rst index d77a170ce85..ea9816780a7 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _plotting: Plotting @@ -10,8 +11,8 @@ Labeled data enables expressive computations. These same labels can also be used to easily create informative plots. xarray's plotting capabilities are centered around -:py:class:`xarray.DataArray` objects. -To plot :py:class:`xarray.Dataset` objects +:py:class:`DataArray` objects. +To plot :py:class:`Dataset` objects simply access the relevant DataArrays, ie ``dset['var1']``. Dataset specific plotting routines are also available (see :ref:`plot-dataset`). Here we focus mostly on arrays 2d or larger. If your data fits @@ -94,7 +95,7 @@ One Dimension Simple Example ================ -The simplest way to make a plot is to call the :py:func:`xarray.DataArray.plot()` method. +The simplest way to make a plot is to call the :py:func:`DataArray.plot()` method. .. ipython:: python @@ -227,7 +228,7 @@ It is required to explicitly specify either Thus, we could have made the previous plot by specifying ``hue='lat'`` instead of ``x='time'``. If required, the automatic legend can be turned off using ``add_legend=False``. Alternatively, -``hue`` can be passed directly to :py:func:`xarray.plot` as `air.isel(lon=10, lat=[19,21,22]).plot(hue='lat')`. +``hue`` can be passed directly to :py:func:`xarray.plot.line` as `air.isel(lon=10, lat=[19,21,22]).plot.line(hue='lat')`. ======================== @@ -256,7 +257,7 @@ made using 1D data. The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy -when plotting data grouped with :py:func:`xarray.Dataset.groupby_bins`. +when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python @@ -295,7 +296,7 @@ Two Dimensions Simple Example ================ -The default method :py:meth:`xarray.DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional. +The default method :py:meth:`DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional. .. ipython:: python @@ -573,7 +574,7 @@ Faceted plotting supports other arguments common to xarray 2d plots. FacetGrid Objects =================== -The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.FacetGrid`` object +The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.FacetGrid` object that links a :py:class:`DataArray` to a matplotlib figure with a particular structure. This object can be used to control the behavior of the multiple plots. It borrows an API and code from `Seaborn's FacetGrid @@ -612,11 +613,11 @@ they have been plotted. plt.draw() -:py:class:`~xarray.FacetGrid` objects have methods that let you customize the automatically generated +:py:class:`~xarray.plot.FacetGrid` objects have methods that let you customize the automatically generated axis labels, axis ticks and plot titles. See :py:meth:`~xarray.plot.FacetGrid.set_titles`, :py:meth:`~xarray.plot.FacetGrid.set_xlabels`, :py:meth:`~xarray.plot.FacetGrid.set_ylabels` and :py:meth:`~xarray.plot.FacetGrid.set_ticks` for more information. -Plotting functions can be applied to each subset of the data by calling :py:meth:`~xarray.plot.FacetGrid.map_dataarray` or to each subplot by calling :py:meth:`FacetGrid.map`. +Plotting functions can be applied to each subset of the data by calling :py:meth:`~xarray.plot.FacetGrid.map_dataarray` or to each subplot by calling :py:meth:`~xarray.plot.FacetGrid.map`. TODO: add an example of using the ``map`` method to plot dataset variables (e.g., with ``plt.quiver``). diff --git a/doc/terminology.rst b/doc/terminology.rst index d1265e4da9d..ab6d856920a 100644 --- a/doc/terminology.rst +++ b/doc/terminology.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _terminology: Terminology diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 44bff9e7202..d03cfb948fa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,8 +28,8 @@ New Features - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. -- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` - and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) +- Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen` + and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ Bug fixes @@ -74,7 +74,7 @@ Internal Changes :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int, slice, list of int, scalar ndarray, or 1-dimensional ndarray. (:pull:`3533`) by `Guido Imperiale `_. -- Removed internal method ``Dataset._from_vars_and_coord_names``, +- Removed internal method ``Dataset._from_vars_and_coord_names``, which was dominated by ``Dataset._construct_direct``. (:pull:`3565`) By `Maximilian Roos `_ @@ -101,8 +101,8 @@ Breaking changes New Features ~~~~~~~~~~~~ -- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, - :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, +- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, + :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, :py:meth:`~xarray.Dataset.reindex` (:issue:`3518`). By `Keisuke Fujii `_. - Added the ``fill_value`` option to :py:meth:`DataArray.unstack` and @@ -112,13 +112,13 @@ New Features :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data gap that will be filled by interpolation. By `Deepak Cherian `_. - Added :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` for dropping labels. - :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for + :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for dropping variables (including coordinates). The existing :py:meth:`Dataset.drop` & :py:meth:`DataArray.drop` methods remain as a backward compatible option for dropping either labels or variables, but using the more specific methods is encouraged. (:pull:`3475`) By `Maximilian Roos `_ -- Added :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` for +- Added :py:meth:`Dataset.map` & ``GroupBy.map`` & ``Resample.map`` for mapping / applying a function over each item in the collection, reflecting the widely used and least surprising name for this operation. The existing ``apply`` methods remain for backward compatibility, though using the ``map`` @@ -137,7 +137,7 @@ New Features - :py:func:`xarray.dot`, and :py:meth:`DataArray.dot` now support the ``dims=...`` option to sum over the union of dimensions of all input arrays (:issue:`3423`) by `Mathias Hauser `_. -- Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve +- Added new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` to improve representation of objects in Jupyter. By default this feature is turned off for now. Enable it with ``xarray.set_options(display_style="html")``. (:pull:`3425`) by `Benoit Bovy `_ and @@ -146,25 +146,26 @@ New Features `_ for xarray objects. Note that xarray objects with a dask.array backend already used deterministic hashing in previous releases; this change implements it when whole - xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map` is + xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. +- Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`. - xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk. See :ref:`io.coordinates` for more. (:issue:`3351`, :pull:`3487`) By `Deepak Cherian `_. -- Add the documented-but-missing :py:meth:`DatasetGroupBy.quantile`. +- Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`. (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Bug fixes ~~~~~~~~~ -- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when +- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when calling :py:meth:`Dataset.rename`, :py:meth:`Dataset.rename_dims` and :py:meth:`Dataset.rename_vars`. By `Mathias Hauser `_. (:issue:`3522`). - Fix a bug in :py:meth:`DataArray.set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`). By `Keisuke Fujii `_. - Harmonize ``_FillValue``, ``missing_value`` during encoding and decoding steps. (:pull:`3502`) - By `Anderson Banihirwe `_. + By `Anderson Banihirwe `_. - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). @@ -179,7 +180,7 @@ Bug fixes - Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`). In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated. By `Deepak Cherian `_. -- Fix :py:meth:`GroupBy.reduce` when reducing over multiple dimensions. +- Fix ``GroupBy.reduce`` when reducing over multiple dimensions. (:issue:`3402`). By `Deepak Cherian `_ - Allow appending datetime and bool data variables to zarr stores. (:issue:`3480`). By `Akihiro Matsukawa `_. @@ -229,7 +230,7 @@ Internal Changes - Enable type checking on default sentinel values (:pull:`3472`) By `Maximilian Roos `_ -- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`) +- Add ``Variable._replace`` for simpler replacing of a subset of attributes (:pull:`3472`) By `Maximilian Roos `_ .. _whats-new.0.14.0: @@ -285,7 +286,7 @@ New functions/methods Enhancements ~~~~~~~~~~~~ -- :py:class:`~xarray.core.GroupBy` enhancements. By `Deepak Cherian `_. +- ``core.groupby.GroupBy`` enhancements. By `Deepak Cherian `_. - Added a repr (:pull:`3344`). Example:: @@ -320,7 +321,7 @@ Bug fixes - Fix error in concatenating unlabeled dimensions (:pull:`3362`). By `Deepak Cherian `_. - Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is - specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created. + specified when the :py:class:`~core.rolling.DatasetRolling` or :py:class:`~core.rolling.DataArrayRolling` object is created. (:pull:`3362`). By `Deepak Cherian `_. Documentation @@ -393,7 +394,7 @@ Breaking changes - Reindexing with variables of a different dimension now raise an error (previously deprecated) - ``xarray.broadcast_array`` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) -- :py:meth:`Variable.expand_dims` is removed (previously deprecated in favor of +- ``Variable.expand_dims`` is removed (previously deprecated in favor of :py:meth:`Variable.set_dims`) New functions/methods @@ -478,8 +479,7 @@ Enhancements - ``xarray.Dataset.drop`` now supports keyword arguments; dropping index labels by using both ``dim`` and ``labels`` or using a - :py:class:`~xarray.core.coordinates.DataArrayCoordinates` object are - deprecated (:issue:`2910`). + :py:class:`~core.coordinates.DataArrayCoordinates` object are deprecated (:issue:`2910`). By `Gregory Gundersen `_. - Added examples of :py:meth:`Dataset.set_index` and @@ -627,7 +627,7 @@ New functions/methods By `Alan Brammer `_ and `Ryan May `_. -- :py:meth:`~xarray.core.GroupBy.quantile` is now a method of ``GroupBy`` +- ``GroupBy.quantile`` is now a method of ``GroupBy`` objects (:issue:`3018`). By `David Huard `_. @@ -1169,7 +1169,7 @@ Announcements of note: for more details. - We have a new :doc:`roadmap` that outlines our future development plans. -- `Dataset.apply` now properly documents the way `func` is called. +- ``Dataset.apply`` now properly documents the way `func` is called. By `Matti Eskelinen `_. Enhancements @@ -1601,7 +1601,7 @@ Backwards incompatible changes Enhancements ~~~~~~~~~~~~ -- Added :py:func:`~xarray.dot`, equivalent to :py:func:`np.einsum`. +- Added :py:func:`~xarray.dot`, equivalent to :py:func:`numpy.einsum`. Also, :py:func:`~xarray.DataArray.dot` now supports ``dims`` option, which specifies the dimensions to sum over. (:issue:`1951`) @@ -1786,7 +1786,7 @@ Bug fixes coordinates of target, destination and keys. If there are any conflict among these coordinates, ``IndexError`` will be raised. By `Keisuke Fujii `_. -- Properly point :py:meth:`DataArray.__dask_scheduler__` to +- Properly point ``DataArray.__dask_scheduler__`` to ``dask.threaded.get``. By `Matthew Rocklin `_. - Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays with size one in some dimension can now be plotted, which is good for @@ -1998,7 +1998,7 @@ Enhancements - Support for :py:class:`pathlib.Path` objects added to :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_mfdataset`, - :py:func:`~xarray.to_netcdf`, and :py:func:`~xarray.save_mfdataset` + ``xarray.to_netcdf``, and :py:func:`~xarray.save_mfdataset` (:issue:`799`): .. ipython:: @@ -2406,7 +2406,7 @@ Enhancements By `Stephan Hoyer `_ and `Phillip J. Wolfram `_. -- New aggregation on rolling objects :py:meth:`DataArray.rolling(...).count()` +- New aggregation on rolling objects :py:meth:`~core.rolling.DataArrayRolling.count` which providing a rolling count of valid values (:issue:`1138`). Bug fixes diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 8838e71e6ca..6a975b948eb 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -854,12 +854,10 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): ---------- pat : string or compiled regex String can be a character sequence or regular expression. - repl : string or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. See :func:`re.sub`. - n : int, default -1 (all) Number of replacements to make from start case : boolean, default None @@ -873,7 +871,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): - If True, assumes the passed-in pattern is a regular expression. - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is - a callable. + a callable. Returns ------- diff --git a/xarray/core/resample.py b/xarray/core/resample.py index fb388490d06..2b3b7da6217 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -184,6 +184,7 @@ def map(self, func, shortcut=False, args=(), **kwargs): Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how to stack together the array. The rule is: + 1. If the dimension along which the group coordinate is defined is still in the first grouped array after applying `func`, then stack over this dimension. @@ -196,11 +197,13 @@ def map(self, func, shortcut=False, args=(), **kwargs): Callable to apply to each array. shortcut : bool, optional Whether or not to shortcut evaluation under the assumptions that: + (1) The action of `func` does not depend on any of the array metadata (attributes or coordinates) but only on the data and dimensions. (2) The action of `func` creates arrays with homogeneous metadata, that is, with the same dimensions and attributes. + If these conditions are satisfied `shortcut` provides significant speedup. This should be the case for many common groupby operations (e.g., applying numpy ufuncs). @@ -275,6 +278,7 @@ def map(self, func, args=(), shortcut=None, **kwargs): Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how to stack together the datasets. The rule is: + 1. If the dimension along which the group coordinate is defined is still in the first grouped item after applying `func`, then stack over this dimension. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 4474d973f59..cb00c9dcfe0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1696,6 +1696,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): This optional parameter specifies the interpolation method to use when the desired quantile lies between two data points ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` is the fractional part of the index surrounded by ``i`` and ``j``. @@ -1703,6 +1704,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + keep_attrs : bool, optional If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 0d6d147f0bb..6a0e62cc9dc 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -78,6 +78,13 @@ def netcdf_and_hdf5_versions(): def show_versions(file=sys.stdout): + """ print the versions of xarray and its dependencies + + Parameters + ---------- + file : file-like, optional + print to the given file-like object. Defaults to sys.stdout. + """ sys_info = get_sys_info() try: From 471a5d6ce3eb10a521c8eebd3b97f72461405f19 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 18 Dec 2019 00:22:16 +0100 Subject: [PATCH 23/28] update readthedocs.yml (#3639) --- readthedocs.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/readthedocs.yml b/readthedocs.yml index c64fa1b7b02..9ed8d28eaf2 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,8 +1,13 @@ +version: 2 + build: image: latest + conda: - file: ci/requirements/doc.yml + environment: ci/requirements/doc.yml + python: version: 3.7 - setup_py_install: false + install: [] + formats: [] From 3cbc459caa010f9b5042d3fa312b66c9b2b6c403 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 18 Dec 2019 08:27:30 -0700 Subject: [PATCH 24/28] Extend DatetimeAccessor properties and support `.dt` accessor for Timedelta (#3612) * Support `.dt` accessor for Timedelta * Rename accessors * Use `is_np_timedelta_like` for consistency * Use `pd.timedelta_range` * Move shared method to Properties * Parametrize field access test * move `strftime()` to `DatetimeAccessor` * Update the documentation * Update `whats-new.rst` * Add PR reference * Parametrize tests * Extend DatetimeAccessor properties * Cleanup * Fix docstring --- doc/api.rst | 1 + doc/whats-new.rst | 4 + xarray/core/accessor_dt.py | 283 +++++++++++++++++++++++-------- xarray/core/common.py | 6 + xarray/core/dataarray.py | 4 +- xarray/tests/test_accessor_dt.py | 259 +++++++++++++++++++++------- 6 files changed, 418 insertions(+), 139 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index d3491e020fd..9735eb0c708 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -616,6 +616,7 @@ Accessors :toctree: generated/ core.accessor_dt.DatetimeAccessor + core.accessor_dt.TimedeltaAccessor core.accessor_str.StringAccessor Custom Indexes diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d03cfb948fa..fe05a4d2c21 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,6 +31,10 @@ New Features - Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen` and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ +- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties + and support `.dt` accessor for timedelta + via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) + By `Anderson Banihirwe `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index aff6fbc6691..c407371f9f0 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -1,7 +1,11 @@ import numpy as np import pandas as pd -from .common import _contains_datetime_like_objects, is_np_datetime_like +from .common import ( + _contains_datetime_like_objects, + is_np_datetime_like, + is_np_timedelta_like, +) from .pycompat import dask_array_type @@ -145,37 +149,8 @@ def _strftime(values, date_format): return access_method(values, date_format) -class DatetimeAccessor: - """Access datetime fields for DataArrays with datetime-like dtypes. - - Similar to pandas, fields can be accessed through the `.dt` attribute - for applicable DataArrays: - - >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', - ... freq='D', periods=100)}) - >>> ds.time.dt - - >>> ds.time.dt.dayofyear[:5] - - array([1, 2, 3, 4, 5], dtype=int32) - Coordinates: - * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - - All of the pandas fields are accessible here. Note that these fields are - not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using cftime, then some fields like - `dayofyear` may not be accurate. - - """ - +class Properties: def __init__(self, obj): - if not _contains_datetime_like_objects(obj): - raise TypeError( - "'dt' accessor only available for " - "DataArray with datetime64 timedelta64 dtype or " - "for arrays containing cftime datetime " - "objects." - ) self._obj = obj def _tslib_field_accessor( # type: ignore @@ -194,48 +169,6 @@ def f(self, dtype=dtype): f.__doc__ = docstring return property(f) - year = _tslib_field_accessor("year", "The year of the datetime", np.int64) - month = _tslib_field_accessor( - "month", "The month as January=1, December=12", np.int64 - ) - day = _tslib_field_accessor("day", "The days of the datetime", np.int64) - hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64) - minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64) - second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64) - microsecond = _tslib_field_accessor( - "microsecond", "The microseconds of the datetime", np.int64 - ) - nanosecond = _tslib_field_accessor( - "nanosecond", "The nanoseconds of the datetime", np.int64 - ) - weekofyear = _tslib_field_accessor( - "weekofyear", "The week ordinal of the year", np.int64 - ) - week = weekofyear - dayofweek = _tslib_field_accessor( - "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 - ) - weekday = dayofweek - - weekday_name = _tslib_field_accessor( - "weekday_name", "The name of day in a week (ex: Friday)", object - ) - - dayofyear = _tslib_field_accessor( - "dayofyear", "The ordinal day of the year", np.int64 - ) - quarter = _tslib_field_accessor("quarter", "The quarter of the date") - days_in_month = _tslib_field_accessor( - "days_in_month", "The number of days in the month", np.int64 - ) - daysinmonth = days_in_month - - season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object) - - time = _tslib_field_accessor( - "time", "Timestamps corresponding to datetimes", object - ) - def _tslib_round_accessor(self, name, freq): obj_type = type(self._obj) result = _round_field(self._obj.data, name, freq) @@ -290,6 +223,50 @@ def round(self, freq): """ return self._tslib_round_accessor("round", freq) + +class DatetimeAccessor(Properties): + """Access datetime fields for DataArrays with datetime-like dtypes. + + Fields can be accessed through the `.dt` attribute + for applicable DataArrays. + + Notes + ------ + Note that these fields are not calendar-aware; if your datetimes are encoded + with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime, + then some fields like `dayofyear` may not be accurate. + + Examples + --------- + >>> import xarray as xr + >>> import pandas as pd + >>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000', + '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000', + '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000', + '2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000', + '2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'], + dtype='datetime64[ns]') + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt + + >>> ts.dt.dayofyear + + array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt.quarter + + array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + + """ + def strftime(self, date_format): ''' Return an array of formatted strings specified by date_format, which @@ -323,3 +300,163 @@ def strftime(self, date_format): return obj_type( result, name="strftime", coords=self._obj.coords, dims=self._obj.dims ) + + year = Properties._tslib_field_accessor( + "year", "The year of the datetime", np.int64 + ) + month = Properties._tslib_field_accessor( + "month", "The month as January=1, December=12", np.int64 + ) + day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64) + hour = Properties._tslib_field_accessor( + "hour", "The hours of the datetime", np.int64 + ) + minute = Properties._tslib_field_accessor( + "minute", "The minutes of the datetime", np.int64 + ) + second = Properties._tslib_field_accessor( + "second", "The seconds of the datetime", np.int64 + ) + microsecond = Properties._tslib_field_accessor( + "microsecond", "The microseconds of the datetime", np.int64 + ) + nanosecond = Properties._tslib_field_accessor( + "nanosecond", "The nanoseconds of the datetime", np.int64 + ) + weekofyear = Properties._tslib_field_accessor( + "weekofyear", "The week ordinal of the year", np.int64 + ) + week = weekofyear + dayofweek = Properties._tslib_field_accessor( + "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 + ) + weekday = dayofweek + + weekday_name = Properties._tslib_field_accessor( + "weekday_name", "The name of day in a week", object + ) + + dayofyear = Properties._tslib_field_accessor( + "dayofyear", "The ordinal day of the year", np.int64 + ) + quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date") + days_in_month = Properties._tslib_field_accessor( + "days_in_month", "The number of days in the month", np.int64 + ) + daysinmonth = days_in_month + + season = Properties._tslib_field_accessor("season", "Season of the year", object) + + time = Properties._tslib_field_accessor( + "time", "Timestamps corresponding to datetimes", object + ) + + is_month_start = Properties._tslib_field_accessor( + "is_month_start", + "Indicates whether the date is the first day of the month.", + bool, + ) + is_month_end = Properties._tslib_field_accessor( + "is_month_end", "Indicates whether the date is the last day of the month.", bool + ) + is_quarter_start = Properties._tslib_field_accessor( + "is_quarter_start", + "Indicator for whether the date is the first day of a quarter.", + bool, + ) + is_quarter_end = Properties._tslib_field_accessor( + "is_quarter_end", + "Indicator for whether the date is the last day of a quarter.", + bool, + ) + is_year_start = Properties._tslib_field_accessor( + "is_year_start", "Indicate whether the date is the first day of a year.", bool + ) + is_year_end = Properties._tslib_field_accessor( + "is_year_end", "Indicate whether the date is the last day of the year.", bool + ) + is_leap_year = Properties._tslib_field_accessor( + "is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool + ) + + +class TimedeltaAccessor(Properties): + """Access Timedelta fields for DataArrays with Timedelta-like dtypes. + + Fields can be accessed through the `.dt` attribute for applicable DataArrays. + + Examples + -------- + >>> import pandas as pd + >>> import xarray as xr + >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000, + 172800000000000, 194400000000000, 216000000000000, 237600000000000, + 259200000000000, 280800000000000, 302400000000000, 324000000000000, + 345600000000000, 367200000000000, 388800000000000, 410400000000000, + 432000000000000, 453600000000000, 475200000000000, 496800000000000], + dtype='timedelta64[ns]') + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt + + >>> ts.dt.days + + array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.microseconds + + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.seconds + + array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, + 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600, + 43200, 64800]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + """ + + days = Properties._tslib_field_accessor( + "days", "Number of days for each element.", np.int64 + ) + seconds = Properties._tslib_field_accessor( + "seconds", + "Number of seconds (>= 0 and less than 1 day) for each element.", + np.int64, + ) + microseconds = Properties._tslib_field_accessor( + "microseconds", + "Number of microseconds (>= 0 and less than 1 second) for each element.", + np.int64, + ) + nanoseconds = Properties._tslib_field_accessor( + "nanoseconds", + "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", + np.int64, + ) + + +class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor): + def __new__(cls, obj): + # CombinedDatetimelikeAccessor isn't really instatiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + if not _contains_datetime_like_objects(obj): + raise TypeError( + "'.dt' accessor only available for " + "DataArray with datetime64 timedelta64 dtype or " + "for arrays containing cftime datetime " + "objects." + ) + + if is_np_timedelta_like(obj.dtype): + return TimedeltaAccessor(obj) + else: + return DatetimeAccessor(obj) diff --git a/xarray/core/common.py b/xarray/core/common.py index a74318b2f90..e908c69dd14 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool: return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) +def is_np_timedelta_like(dtype: DTypeLike) -> bool: + """Check whether dtype is of the timedelta64 dtype. + """ + return np.issubdtype(dtype, np.timedelta64) + + def _contains_cftime_datetimes(array) -> bool: """Check if an array contains cftime.datetime objects """ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 31cd3c713f6..31aa4da57b2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -33,7 +33,7 @@ rolling, utils, ) -from .accessor_dt import DatetimeAccessor +from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( _broadcast_helper, @@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords): _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample - dt = property(DatetimeAccessor) + dt = property(CombinedDatetimelikeAccessor) def __init__( self, diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 5fe5b8c3f59..67ca12532c7 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -12,6 +12,8 @@ requires_dask, ) +from .test_dask import raise_if_dask_computes, assert_chunks_equal + class TestDatetimeAccessor: @pytest.fixture(autouse=True) @@ -37,24 +39,38 @@ def setup(self): name="data", ) - def test_field_access(self): - years = xr.DataArray( - self.times.year, name="year", coords=[self.times], dims=["time"] - ) - months = xr.DataArray( - self.times.month, name="month", coords=[self.times], dims=["time"] - ) - days = xr.DataArray( - self.times.day, name="day", coords=[self.times], dims=["time"] - ) - hours = xr.DataArray( - self.times.hour, name="hour", coords=[self.times], dims=["time"] + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] ) - - assert_equal(years, self.data.time.dt.year) - assert_equal(months, self.data.time.dt.month) - assert_equal(days, self.data.time.dt.day) - assert_equal(hours, self.data.time.dt.hour) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) def test_strftime(self): assert ( @@ -69,55 +85,74 @@ def test_not_datetime_type(self): nontime_data.time.dt @requires_dask - def test_dask_field_access(self): + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) + def test_dask_field_access(self, field): import dask.array as da - years = self.times_data.dt.year - months = self.times_data.dt.month - hours = self.times_data.dt.hour - days = self.times_data.dt.day - floor = self.times_data.dt.floor("D") - ceil = self.times_data.dt.ceil("D") - round = self.times_data.dt.round("D") - strftime = self.times_data.dt.strftime("%Y-%m-%d %H:%M:%S") + expected = getattr(self.times_data.dt, field) + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) + + @requires_dask + @pytest.mark.parametrize( + "method, parameters", + [ + ("floor", "D"), + ("ceil", "D"), + ("round", "D"), + ("strftime", "%Y-%m-%d %H:%M:%S"), + ], + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da + expected = getattr(self.times_data.dt, method)(parameters) dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) dask_times_2d = xr.DataArray( dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" ) - dask_year = dask_times_2d.dt.year - dask_month = dask_times_2d.dt.month - dask_day = dask_times_2d.dt.day - dask_hour = dask_times_2d.dt.hour - dask_floor = dask_times_2d.dt.floor("D") - dask_ceil = dask_times_2d.dt.ceil("D") - dask_round = dask_times_2d.dt.round("D") - dask_strftime = dask_times_2d.dt.strftime("%Y-%m-%d %H:%M:%S") - - # Test that the data isn't eagerly evaluated - assert isinstance(dask_year.data, da.Array) - assert isinstance(dask_month.data, da.Array) - assert isinstance(dask_day.data, da.Array) - assert isinstance(dask_hour.data, da.Array) - assert isinstance(dask_strftime.data, da.Array) - - # Double check that outcome chunksize is unchanged - dask_chunks = dask_times_2d.chunks - assert dask_year.data.chunks == dask_chunks - assert dask_month.data.chunks == dask_chunks - assert dask_day.data.chunks == dask_chunks - assert dask_hour.data.chunks == dask_chunks - assert dask_strftime.data.chunks == dask_chunks - - # Check the actual output from the accessors - assert_equal(years, dask_year.compute()) - assert_equal(months, dask_month.compute()) - assert_equal(days, dask_day.compute()) - assert_equal(hours, dask_hour.compute()) - assert_equal(floor, dask_floor.compute()) - assert_equal(ceil, dask_ceil.compute()) - assert_equal(round, dask_round.compute()) - assert_equal(strftime, dask_strftime.compute()) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) def test_seasons(self): dates = pd.date_range(start="2000/01/01", freq="M", periods=12) @@ -140,12 +175,108 @@ def test_seasons(self): assert_array_equal(seasons.values, dates.dt.season.values) - def test_rounders(self): + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_method(self, method, parameters): dates = pd.date_range("2014-01-01", "2014-05-01", freq="H") - xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates]) - assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values) - assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values) - assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) + + +class TestTimedeltaAccessor: + @pytest.fixture(autouse=True) + def setup(self): + nt = 100 + data = np.random.rand(10, 10, nt) + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + self.times = pd.timedelta_range(start="1 day", freq="6H", periods=nt) + + self.data = xr.DataArray( + data, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + self.times_arr = np.random.choice(self.times, size=(10, 10, nt)) + self.times_data = xr.DataArray( + self.times_arr, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + def test_not_datetime_type(self): + nontime_data = self.data.copy() + int_data = np.arange(len(self.data.time)).astype("int8") + nontime_data["time"].values = int_data + with raises_regex(TypeError, "dt"): + nontime_data.time.dt + + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] + ) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) + + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_methods(self, method, parameters): + dates = pd.timedelta_range(start="1 day", end="30 days", freq="6H") + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) + + @requires_dask + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_dask_field_access(self, field): + import dask.array as da + + expected = getattr(self.times_data.dt, field) + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual, expected) + + @requires_dask + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da + + expected = getattr(self.times_data.dt, method)(parameters) + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) _CFTIME_CALENDARS = [ From aaf37381b38310cb11311cac26515b1ecbcf09c1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 24 Dec 2019 13:37:03 +0000 Subject: [PATCH 25/28] concat keeps attrs from first variable. (#3637) * concat keep attrs from first variable. Fixes #2060 Fixes #2575 --- doc/whats-new.rst | 3 +++ xarray/core/concat.py | 6 ++++-- xarray/core/variable.py | 5 +++-- xarray/tests/test_concat.py | 13 +++++++++++++ xarray/tests/test_variable.py | 8 ++------ 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fe05a4d2c21..69f9fd7f02c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:func:`xarray.concat` now preserves attributes from the first Variable. + (:issue:`2575`, :issue:`2060`, :issue:`1614`) + By `Deepak Cherian `_. - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 5ccbfa3f2b4..302f7afcec6 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -93,12 +93,14 @@ def concat( those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - indexers, mode, concat_over : deprecated - Returns ------- concatenated : type of objs + Notes + ----- + Each concatenated Variable preserves corresponding ``attrs`` from the first element of ``objs``. + See also -------- merge diff --git a/xarray/core/variable.py b/xarray/core/variable.py index cb00c9dcfe0..0a9d0767b77 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1625,8 +1625,9 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): if not shortcut: for var in variables: if var.dims != first_var.dims: - raise ValueError("inconsistent dimensions") - utils.remove_incompatible_items(attrs, var.attrs) + raise ValueError( + f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var.dims)}" + ) return cls(dims, data, attrs, encoding) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 0661ebb7a38..def5abc942f 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -462,3 +462,16 @@ def test_concat_join_kwarg(self): for join in expected: actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join].to_array()) + + +@pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {})) +@pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {})) +def test_concat_attrs_first_variable(attr1, attr2): + + arrs = [ + DataArray([[1], [2]], dims=["x", "y"], attrs=attr1), + DataArray([[3], [4]], dims=["x", "y"], attrs=attr2), + ] + + concat_attrs = concat(arrs, "y").attrs + assert concat_attrs == attr1 diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 49a6906d5be..62fde920b1e 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -432,7 +432,7 @@ def test_concat(self): assert_identical( Variable(["b", "a"], np.array([x, y])), Variable.concat((v, w), "b") ) - with raises_regex(ValueError, "inconsistent dimensions"): + with raises_regex(ValueError, "Variable has dimensions"): Variable.concat([v, Variable(["c"], y)], "b") # test indexers actual = Variable.concat( @@ -451,16 +451,12 @@ def test_concat(self): Variable.concat([v[:, 0], v[:, 1:]], "x") def test_concat_attrs(self): - # different or conflicting attributes should be removed + # always keep attrs from first variable v = self.cls("a", np.arange(5), {"foo": "bar"}) w = self.cls("a", np.ones(5)) expected = self.cls( "a", np.concatenate([np.arange(5), np.ones(5)]) ).to_base_variable() - assert_identical(expected, Variable.concat([v, w], "a")) - w.attrs["foo"] = 2 - assert_identical(expected, Variable.concat([v, w], "a")) - w.attrs["foo"] = "bar" expected.attrs["foo"] = "bar" assert_identical(expected, Variable.concat([v, w], "a")) From 651f27fd5176674da315501026dc18a03b575a76 Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Tue, 24 Dec 2019 05:46:28 -0800 Subject: [PATCH 26/28] Allow incomplete hypercubes in combine_by_coords (#3649) * allow incomplete hypercubes in combine_by_coords * If fill_value=None then still requires complete hypercube * Closes #3648 --- doc/whats-new.rst | 3 +++ xarray/core/combine.py | 36 +++++++++++++++++++++++++++++------- xarray/tests/test_combine.py | 16 ++++++++++++++++ 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 69f9fd7f02c..f08fad12bba 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete + hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger + `_. - Fix :py:meth:`xarray.combine_by_coords` when combining cftime coordinates which span long time intervals (:issue:`3535`). By `Spencer Clark `_. diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 65087b05cc0..3f6e0e79351 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -115,11 +115,12 @@ def _infer_concat_order_from_coords(datasets): return combined_ids, concat_dims -def _check_shape_tile_ids(combined_tile_ids): +def _check_dimension_depth_tile_ids(combined_tile_ids): + """ + Check all tuples are the same length, i.e. check that all lists are + nested to the same depth. + """ tile_ids = combined_tile_ids.keys() - - # Check all tuples are the same length - # i.e. check that all lists are nested to the same depth nesting_depths = [len(tile_id) for tile_id in tile_ids] if not nesting_depths: nesting_depths = [0] @@ -128,8 +129,13 @@ def _check_shape_tile_ids(combined_tile_ids): "The supplied objects do not form a hypercube because" " sub-lists do not have consistent depths" ) + # return these just to be reused in _check_shape_tile_ids + return tile_ids, nesting_depths - # Check all lists along one dimension are same length + +def _check_shape_tile_ids(combined_tile_ids): + """Check all lists along one dimension are same length.""" + tile_ids, nesting_depths = _check_dimension_depth_tile_ids(combined_tile_ids) for dim in range(nesting_depths[0]): indices_along_dim = [tile_id[dim] for tile_id in tile_ids] occurrences = Counter(indices_along_dim) @@ -536,7 +542,8 @@ def combine_by_coords( coords : {'minimal', 'different', 'all' or list of str}, optional As per the 'data_vars' kwarg, but for coordinate variables. fill_value : scalar, optional - Value to use for newly missing values + Value to use for newly missing values. If None, raises a ValueError if + the passed Datasets do not create a complete hypercube. join : {'outer', 'inner', 'left', 'right', 'exact'}, optional String indicating how to combine differing indexes (excluding concat_dim) in objects @@ -653,6 +660,15 @@ def combine_by_coords( temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96 precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 + >>> xr.combine_by_coords([x1, x2, x3]) + + Dimensions: (x: 6, y: 4) + Coordinates: + * x (x) int64 10 20 30 40 50 60 + * y (y) int64 0 1 2 3 + Data variables: + temperature (y, x) float64 1.654 10.63 7.015 nan ... 12.46 2.22 15.96 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 """ # Group by data vars @@ -667,7 +683,13 @@ def combine_by_coords( list(datasets_with_same_vars) ) - _check_shape_tile_ids(combined_ids) + if fill_value is None: + # check that datasets form complete hypercube + _check_shape_tile_ids(combined_ids) + else: + # check only that all datasets have same dimension depth for these + # vars + _check_dimension_depth_tile_ids(combined_ids) # Concatenate along all of concat_dims one by one to create single ds concatenated = _combine_nd( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index a29fe0190cf..d907e1c5e46 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -711,6 +711,22 @@ def test_check_for_impossible_ordering(self): ): combine_by_coords([ds1, ds0]) + def test_combine_by_coords_incomplete_hypercube(self): + # test that this succeeds with default fill_value + x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) + x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) + x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) + actual = combine_by_coords([x1, x2, x3]) + expected = Dataset( + {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, + coords={"y": [0, 1], "x": [0, 1]}, + ) + assert_identical(expected, actual) + + # test that this fails if fill_value is None + with pytest.raises(ValueError): + combine_by_coords([x1, x2, x3], fill_value=None) + @pytest.mark.filterwarnings( "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated" From cc22f41b383cfd3d6df8b95a61893869ff89e855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20BRIOL?= Date: Sun, 29 Dec 2019 15:52:45 +0100 Subject: [PATCH 27/28] added pyinterp to related projects (#3655) added pyinterp to related projects --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index a8af05f3074..3188751366f 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -25,6 +25,7 @@ Geosciences - `PyGDX `_: Python 3 package for accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom subclass. +- `pyinterp `_: Python 3 package for interpolating geo-referenced data used in the field of geosciences. - `pyXpcm `_: xarray-based Profile Classification Modelling (PCM), mostly for ocean data. - `Regionmask `_: plotting and creation of masks of spatial regions - `rioxarray `_: geospatial xarray extension powered by rasterio From b3d3b4480b7fb63402eb6c02103bb8d6c7dbf93a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 30 Dec 2019 17:46:43 +0000 Subject: [PATCH 28/28] Add nanmedian for dask arrays (#3604) * Add nanmedian for dask arrays Close #2999 * Fix tests. * fix import * Make sure that we don't rechunk the entire variable to one chunk by reducing over all dimensions. Dask raises an error when axis=None but not when axis=range(a.ndim). * fix tests. * Update whats-new.rst --- doc/whats-new.rst | 3 ++ xarray/core/dask_array_compat.py | 83 +++++++++++++++++++++++++++++++- xarray/core/duck_array_ops.py | 8 +-- xarray/core/nanops.py | 12 ++++- xarray/tests/test_dask.py | 4 +- 5 files changed, 102 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f08fad12bba..00d1c50780e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Implement :py:func:`median` and :py:func:`nanmedian` for dask arrays. This works by rechunking + to a single chunk along all reduction axes. (:issue:`2999`). + By `Deepak Cherian `_. - :py:func:`xarray.concat` now preserves attributes from the first Variable. (:issue:`2575`, :issue:`2060`, :issue:`1614`) By `Deepak Cherian `_. diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index c3dbdd27098..de55de89f0c 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,8 +1,14 @@ from distutils.version import LooseVersion +from typing import Iterable -import dask.array as da import numpy as np -from dask import __version__ as dask_version + +try: + import dask.array as da + from dask import __version__ as dask_version +except ImportError: + dask_version = "0.0.0" + da = None if LooseVersion(dask_version) >= LooseVersion("2.0.0"): meta_from_array = da.utils.meta_from_array @@ -89,3 +95,76 @@ def meta_from_array(x, ndim=None, dtype=None): meta = meta.astype(dtype) return meta + + +if LooseVersion(dask_version) >= LooseVersion("2.8.1"): + median = da.median +else: + # Copied from dask v2.8.1 + # Used under the terms of Dask's license, see licenses/DASK_LICENSE. + def median(a, axis=None, keepdims=False): + """ + This works by automatically chunking the reduced axes to a single chunk + and then calling ``numpy.median`` function across the remaining dimensions + """ + + if axis is None: + raise NotImplementedError( + "The da.median function only works along an axis. " + "The full algorithm is difficult to do in parallel" + ) + + if not isinstance(axis, Iterable): + axis = (axis,) + + axis = [ax + a.ndim if ax < 0 else ax for ax in axis] + + a = a.rechunk({ax: -1 if ax in axis else "auto" for ax in range(a.ndim)}) + + result = a.map_blocks( + np.median, + axis=axis, + keepdims=keepdims, + drop_axis=axis if not keepdims else None, + chunks=[1 if ax in axis else c for ax, c in enumerate(a.chunks)] + if keepdims + else None, + ) + + return result + + +if LooseVersion(dask_version) > LooseVersion("2.9.0"): + nanmedian = da.nanmedian +else: + + def nanmedian(a, axis=None, keepdims=False): + """ + This works by automatically chunking the reduced axes to a single chunk + and then calling ``numpy.nanmedian`` function across the remaining dimensions + """ + + if axis is None: + raise NotImplementedError( + "The da.nanmedian function only works along an axis. " + "The full algorithm is difficult to do in parallel" + ) + + if not isinstance(axis, Iterable): + axis = (axis,) + + axis = [ax + a.ndim if ax < 0 else ax for ax in axis] + + a = a.rechunk({ax: -1 if ax in axis else "auto" for ax in range(a.ndim)}) + + result = a.map_blocks( + np.nanmedian, + axis=axis, + keepdims=keepdims, + drop_axis=axis if not keepdims else None, + chunks=[1 if ax in axis else c for ax, c in enumerate(a.chunks)] + if keepdims + else None, + ) + + return result diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cf616acb485..98b371ab7c3 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from . import dask_array_ops, dtypes, npcompat, nputils +from . import dask_array_ops, dask_array_compat, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type @@ -284,7 +284,7 @@ def _ignore_warnings_if(condition): yield -def _create_nan_agg_method(name, coerce_strings=False): +def _create_nan_agg_method(name, dask_module=dask_array, coerce_strings=False): from . import nanops def f(values, axis=None, skipna=None, **kwargs): @@ -301,7 +301,7 @@ def f(values, axis=None, skipna=None, **kwargs): nanname = "nan" + name func = getattr(nanops, nanname) else: - func = _dask_or_eager_func(name) + func = _dask_or_eager_func(name, dask_module=dask_module) try: return func(values, axis=axis, **kwargs) @@ -337,7 +337,7 @@ def f(values, axis=None, skipna=None, **kwargs): std.numeric_only = True var = _create_nan_agg_method("var") var.numeric_only = True -median = _create_nan_agg_method("median") +median = _create_nan_agg_method("median", dask_module=dask_array_compat) median.numeric_only = True prod = _create_nan_agg_method("prod") prod.numeric_only = True diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index f70e96217e8..f9989c2c8c9 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -6,8 +6,10 @@ try: import dask.array as dask_array + from . import dask_array_compat except ImportError: dask_array = None + dask_array_compat = None # type: ignore def _replace_nan(a, val): @@ -141,7 +143,15 @@ def nanmean(a, axis=None, dtype=None, out=None): def nanmedian(a, axis=None, out=None): - return _dask_or_eager_func("nanmedian", eager_module=nputils)(a, axis=axis) + # The dask algorithm works by rechunking to one chunk along axis + # Make sure we trigger the dask error when passing all dimensions + # so that we don't rechunk the entire array to one chunk and + # possibly blow memory + if axis is not None and len(np.atleast_1d(axis)) == a.ndim: + axis = None + return _dask_or_eager_func( + "nanmedian", dask_module=dask_array_compat, eager_module=nputils + )(a, axis=axis) def _nanvar_object(value, axis=None, ddof=0, keepdims=False, **kwargs): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 6122e987154..d0e2654eed3 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -216,8 +216,10 @@ def test_reduce(self): self.assertLazyAndAllClose(u.argmin(dim="x"), actual) self.assertLazyAndAllClose((u > 1).any(), (v > 1).any()) self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x")) - with raises_regex(NotImplementedError, "dask"): + with raises_regex(NotImplementedError, "only works along an axis"): v.median() + with raises_regex(NotImplementedError, "only works along an axis"): + v.median(v.dims) with raise_if_dask_computes(): v.reduce(duck_array_ops.mean)