From c162ba7cb5619a81462b7cf9fc2fc4823e91285d Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 16 Jun 2021 09:42:18 -0600 Subject: [PATCH 01/12] Refactor out coarsen tests --- xarray/tests/test_coarsen.py | 301 +++++++++++++++++++++++++++++++++ xarray/tests/test_dataarray.py | 101 ----------- xarray/tests/test_dataset.py | 190 --------------------- 3 files changed, 301 insertions(+), 291 deletions(-) create mode 100644 xarray/tests/test_coarsen.py diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py new file mode 100644 index 00000000000..ca4725a579f --- /dev/null +++ b/xarray/tests/test_coarsen.py @@ -0,0 +1,301 @@ +import numpy as np +import pandas as pd +import pytest + +import xarray as xr +from xarray import DataArray, Dataset, set_options + +from . import assert_allclose, assert_equal, has_dask, requires_cftime +from .test_dataarray import da +from .test_dataset import ds + + +def test_coarsen_absent_dims_error(ds): + with pytest.raises(ValueError, match=r"not found in Dataset."): + ds.coarsen(foo=2) + + +@pytest.mark.parametrize("dask", [True, False]) +@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")]) +def test_coarsen_dataset(ds, dask, boundary, side): + if dask and has_dask: + ds = ds.chunk({"x": 4}) + + actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() + assert_equal( + actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max() + ) + # coordinate should be mean by default + assert_equal( + actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean() + ) + + +@pytest.mark.parametrize("dask", [True, False]) +def test_coarsen_coords(ds, dask): + if dask and has_dask: + ds = ds.chunk({"x": 4}) + + # check if coord_func works + actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max() + assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max()) + assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max()) + + # raise if exact + with pytest.raises(ValueError): + ds.coarsen(x=3).mean() + # should be no error + ds.isel(x=slice(0, 3 * (len(ds["x"]) // 3))).coarsen(x=3).mean() + + # working test with pd.time + da = xr.DataArray( + np.linspace(0, 365, num=364), + dims="time", + coords={"time": pd.date_range("15/12/1999", periods=364)}, + ) + actual = da.coarsen(time=2).mean() + + +@requires_cftime +def test_coarsen_coords_cftime(): + times = xr.cftime_range("2000", periods=6) + da = xr.DataArray(range(6), [("time", times)]) + actual = da.coarsen(time=3).mean() + expected_times = xr.cftime_range("2000-01-02", freq="3D", periods=2) + np.testing.assert_array_equal(actual.time, expected_times) + + +@pytest.mark.parametrize( + "funcname, argument", + [ + ("reduce", (np.mean,)), + ("mean", ()), + ], +) +def test_coarsen_keep_attrs(funcname, argument): + global_attrs = {"units": "test", "long_name": "testing"} + da_attrs = {"da_attr": "test"} + attrs_coords = {"attrs_coords": "test"} + da_not_coarsend_attrs = {"da_not_coarsend_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={ + "da": ("coord", data, da_attrs), + "da_not_coarsend": ("no_coord", data, da_not_coarsend_attrs), + }, + coords={"coord": ("coord", coords, attrs_coords)}, + attrs=global_attrs, + ) + + # attrs are now kept per default + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + result = func(*argument) + assert result.attrs == global_attrs + assert result.da.attrs == da_attrs + assert result.da_not_coarsend.attrs == da_not_coarsend_attrs + assert result.coord.attrs == attrs_coords + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + # discard attrs + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + assert result.da.attrs == {} + assert result.da_not_coarsend.attrs == {} + assert result.coord.attrs == {} + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + # test discard attrs using global option + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument) + + assert result.attrs == {} + assert result.da.attrs == {} + assert result.da_not_coarsend.attrs == {} + assert result.coord.attrs == {} + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + # keyword takes precedence over global option + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument, keep_attrs=True) + + assert result.attrs == global_attrs + assert result.da.attrs == da_attrs + assert result.da_not_coarsend.attrs == da_not_coarsend_attrs + assert result.coord.attrs == attrs_coords + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + func = getattr(ds.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=True): + result = func(*argument, keep_attrs=False) + + assert result.attrs == {} + assert result.da.attrs == {} + assert result.da_not_coarsend.attrs == {} + assert result.coord.attrs == {} + assert result.da.name == "da" + assert result.da_not_coarsend.name == "da_not_coarsend" + + +def test_coarsen_keep_attrs_deprecated(): + global_attrs = {"units": "test", "long_name": "testing"} + attrs_da = {"da_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + ds = Dataset( + data_vars={"da": ("coord", data)}, + coords={"coord": coords}, + attrs=global_attrs, + ) + ds.da.attrs = attrs_da + + # deprecated option + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = ds.coarsen(dim={"coord": 5}, keep_attrs=False).mean() + + assert result.attrs == {} + assert result.da.attrs == {} + + # the keep_attrs in the reduction function takes precedence + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = ds.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) + + assert result.attrs == {} + assert result.da.attrs == {} + + +@pytest.mark.slow +@pytest.mark.parametrize("ds", (1, 2), indirect=True) +@pytest.mark.parametrize("window", (1, 2, 3, 4)) +@pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median")) +def test_coarsen_reduce(ds, window, name): + # Use boundary="trim" to accomodate all window sizes used in tests + coarsen_obj = ds.coarsen(time=window, boundary="trim") + + # add nan prefix to numpy methods to get similar behavior as bottleneck + actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) + expected = getattr(coarsen_obj, name)() + assert_allclose(actual, expected) + + # make sure the order of data_var are not changed. + assert list(ds.data_vars.keys()) == list(actual.data_vars.keys()) + + # Make sure the dimension order is restored + for key, src_var in ds.data_vars.items(): + assert src_var.dims == actual[key].dims + + +@pytest.mark.parametrize( + "funcname, argument", + [ + ("reduce", (np.mean,)), + ("mean", ()), + ], +) +def test_coarsen_da_keep_attrs(funcname, argument): + attrs_da = {"da_attr": "test"} + attrs_coords = {"attrs_coords": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + da = DataArray( + data, + dims=("coord"), + coords={"coord": ("coord", coords, attrs_coords)}, + attrs=attrs_da, + name="name", + ) + + # attrs are now kept per default + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + result = func(*argument) + assert result.attrs == attrs_da + da.coord.attrs == attrs_coords + assert result.name == "name" + + # discard attrs + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + da.coord.attrs == {} + assert result.name == "name" + + # test discard attrs using global option + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument) + assert result.attrs == {} + da.coord.attrs == {} + assert result.name == "name" + + # keyword takes precedence over global option + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=False): + result = func(*argument, keep_attrs=True) + assert result.attrs == attrs_da + da.coord.attrs == {} + assert result.name == "name" + + func = getattr(da.coarsen(dim={"coord": 5}), funcname) + with set_options(keep_attrs=True): + result = func(*argument, keep_attrs=False) + assert result.attrs == {} + da.coord.attrs == {} + assert result.name == "name" + + +def test_coarsen_da_keep_attrs_deprecated(): + attrs_da = {"da_attr": "test"} + + data = np.linspace(10, 15, 100) + coords = np.linspace(1, 10, 100) + + da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da) + + # deprecated option + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = da.coarsen(dim={"coord": 5}, keep_attrs=False).mean() + + assert result.attrs == {} + + # the keep_attrs in the reduction function takes precedence + with pytest.warns( + FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" + ): + result = da.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) + + assert result.attrs == {} + + +@pytest.mark.parametrize("da", (1, 2), indirect=True) +@pytest.mark.parametrize("window", (1, 2, 3, 4)) +@pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) +def test_coarsen_da_reduce(da, window, name): + if da.isnull().sum() > 1 and window == 1: + pytest.skip("These parameters lead to all-NaN slices") + + # Use boundary="trim" to accomodate all window sizes used in tests + coarsen_obj = da.coarsen(time=window, boundary="trim") + + # add nan prefix to numpy methods to get similar # behavior as bottleneck + actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) + expected = getattr(coarsen_obj, name)() + assert_allclose(actual, expected) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 95b6036712c..8a82c8c37f3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6496,107 +6496,6 @@ def test_isin(da): assert_equal(result, expected) -@pytest.mark.parametrize( - "funcname, argument", - [ - ("reduce", (np.mean,)), - ("mean", ()), - ], -) -def test_coarsen_keep_attrs(funcname, argument): - attrs_da = {"da_attr": "test"} - attrs_coords = {"attrs_coords": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - da = DataArray( - data, - dims=("coord"), - coords={"coord": ("coord", coords, attrs_coords)}, - attrs=attrs_da, - name="name", - ) - - # attrs are now kept per default - func = getattr(da.coarsen(dim={"coord": 5}), funcname) - result = func(*argument) - assert result.attrs == attrs_da - da.coord.attrs == attrs_coords - assert result.name == "name" - - # discard attrs - func = getattr(da.coarsen(dim={"coord": 5}), funcname) - result = func(*argument, keep_attrs=False) - assert result.attrs == {} - da.coord.attrs == {} - assert result.name == "name" - - # test discard attrs using global option - func = getattr(da.coarsen(dim={"coord": 5}), funcname) - with set_options(keep_attrs=False): - result = func(*argument) - assert result.attrs == {} - da.coord.attrs == {} - assert result.name == "name" - - # keyword takes precedence over global option - func = getattr(da.coarsen(dim={"coord": 5}), funcname) - with set_options(keep_attrs=False): - result = func(*argument, keep_attrs=True) - assert result.attrs == attrs_da - da.coord.attrs == {} - assert result.name == "name" - - func = getattr(da.coarsen(dim={"coord": 5}), funcname) - with set_options(keep_attrs=True): - result = func(*argument, keep_attrs=False) - assert result.attrs == {} - da.coord.attrs == {} - assert result.name == "name" - - -def test_coarsen_keep_attrs_deprecated(): - attrs_da = {"da_attr": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da) - - # deprecated option - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = da.coarsen(dim={"coord": 5}, keep_attrs=False).mean() - - assert result.attrs == {} - - # the keep_attrs in the reduction function takes precedence - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = da.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) - - assert result.attrs == {} - - -@pytest.mark.parametrize("da", (1, 2), indirect=True) -@pytest.mark.parametrize("window", (1, 2, 3, 4)) -@pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) -def test_coarsen_reduce(da, window, name): - if da.isnull().sum() > 1 and window == 1: - pytest.skip("These parameters lead to all-NaN slices") - - # Use boundary="trim" to accomodate all window sizes used in tests - coarsen_obj = da.coarsen(time=window, boundary="trim") - - # add nan prefix to numpy methods to get similar # behavior as bottleneck - actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) - expected = getattr(coarsen_obj, name)() - assert_allclose(actual, expected) - - @pytest.mark.parametrize("da", (1, 2), indirect=True) def test_rolling_iter(da): rolling_obj = da.rolling(time=7) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6c7cebce257..806911f6b1d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6221,196 +6221,6 @@ def ds(request, backend): return ds -def test_coarsen_absent_dims_error(ds): - with pytest.raises(ValueError, match=r"not found in Dataset."): - ds.coarsen(foo=2) - - -@pytest.mark.parametrize("dask", [True, False]) -@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")]) -def test_coarsen(ds, dask, boundary, side): - if dask and has_dask: - ds = ds.chunk({"x": 4}) - - actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() - assert_equal( - actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max() - ) - # coordinate should be mean by default - assert_equal( - actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean() - ) - - -@pytest.mark.parametrize("dask", [True, False]) -def test_coarsen_coords(ds, dask): - if dask and has_dask: - ds = ds.chunk({"x": 4}) - - # check if coord_func works - actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max() - assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max()) - assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max()) - - # raise if exact - with pytest.raises(ValueError): - ds.coarsen(x=3).mean() - # should be no error - ds.isel(x=slice(0, 3 * (len(ds["x"]) // 3))).coarsen(x=3).mean() - - # working test with pd.time - da = xr.DataArray( - np.linspace(0, 365, num=364), - dims="time", - coords={"time": pd.date_range("15/12/1999", periods=364)}, - ) - actual = da.coarsen(time=2).mean() - - -@requires_cftime -def test_coarsen_coords_cftime(): - times = xr.cftime_range("2000", periods=6) - da = xr.DataArray(range(6), [("time", times)]) - actual = da.coarsen(time=3).mean() - expected_times = xr.cftime_range("2000-01-02", freq="3D", periods=2) - np.testing.assert_array_equal(actual.time, expected_times) - - -@pytest.mark.parametrize( - "funcname, argument", - [ - ("reduce", (np.mean,)), - ("mean", ()), - ], -) -def test_coarsen_keep_attrs(funcname, argument): - global_attrs = {"units": "test", "long_name": "testing"} - da_attrs = {"da_attr": "test"} - attrs_coords = {"attrs_coords": "test"} - da_not_coarsend_attrs = {"da_not_coarsend_attr": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - ds = Dataset( - data_vars={ - "da": ("coord", data, da_attrs), - "da_not_coarsend": ("no_coord", data, da_not_coarsend_attrs), - }, - coords={"coord": ("coord", coords, attrs_coords)}, - attrs=global_attrs, - ) - - # attrs are now kept per default - func = getattr(ds.coarsen(dim={"coord": 5}), funcname) - result = func(*argument) - assert result.attrs == global_attrs - assert result.da.attrs == da_attrs - assert result.da_not_coarsend.attrs == da_not_coarsend_attrs - assert result.coord.attrs == attrs_coords - assert result.da.name == "da" - assert result.da_not_coarsend.name == "da_not_coarsend" - - # discard attrs - func = getattr(ds.coarsen(dim={"coord": 5}), funcname) - result = func(*argument, keep_attrs=False) - assert result.attrs == {} - assert result.da.attrs == {} - assert result.da_not_coarsend.attrs == {} - assert result.coord.attrs == {} - assert result.da.name == "da" - assert result.da_not_coarsend.name == "da_not_coarsend" - - # test discard attrs using global option - func = getattr(ds.coarsen(dim={"coord": 5}), funcname) - with set_options(keep_attrs=False): - result = func(*argument) - - assert result.attrs == {} - assert result.da.attrs == {} - assert result.da_not_coarsend.attrs == {} - assert result.coord.attrs == {} - assert result.da.name == "da" - assert result.da_not_coarsend.name == "da_not_coarsend" - - # keyword takes precedence over global option - func = getattr(ds.coarsen(dim={"coord": 5}), funcname) - with set_options(keep_attrs=False): - result = func(*argument, keep_attrs=True) - - assert result.attrs == global_attrs - assert result.da.attrs == da_attrs - assert result.da_not_coarsend.attrs == da_not_coarsend_attrs - assert result.coord.attrs == attrs_coords - assert result.da.name == "da" - assert result.da_not_coarsend.name == "da_not_coarsend" - - func = getattr(ds.coarsen(dim={"coord": 5}), funcname) - with set_options(keep_attrs=True): - result = func(*argument, keep_attrs=False) - - assert result.attrs == {} - assert result.da.attrs == {} - assert result.da_not_coarsend.attrs == {} - assert result.coord.attrs == {} - assert result.da.name == "da" - assert result.da_not_coarsend.name == "da_not_coarsend" - - -def test_coarsen_keep_attrs_deprecated(): - global_attrs = {"units": "test", "long_name": "testing"} - attrs_da = {"da_attr": "test"} - - data = np.linspace(10, 15, 100) - coords = np.linspace(1, 10, 100) - - ds = Dataset( - data_vars={"da": ("coord", data)}, - coords={"coord": coords}, - attrs=global_attrs, - ) - ds.da.attrs = attrs_da - - # deprecated option - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = ds.coarsen(dim={"coord": 5}, keep_attrs=False).mean() - - assert result.attrs == {} - assert result.da.attrs == {} - - # the keep_attrs in the reduction function takes precedence - with pytest.warns( - FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated" - ): - result = ds.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False) - - assert result.attrs == {} - assert result.da.attrs == {} - - -@pytest.mark.slow -@pytest.mark.parametrize("ds", (1, 2), indirect=True) -@pytest.mark.parametrize("window", (1, 2, 3, 4)) -@pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median")) -def test_coarsen_reduce(ds, window, name): - # Use boundary="trim" to accomodate all window sizes used in tests - coarsen_obj = ds.coarsen(time=window, boundary="trim") - - # add nan prefix to numpy methods to get similar behavior as bottleneck - actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) - expected = getattr(coarsen_obj, name)() - assert_allclose(actual, expected) - - # make sure the order of data_var are not changed. - assert list(ds.data_vars.keys()) == list(actual.data_vars.keys()) - - # Make sure the dimension order is restored - for key, src_var in ds.data_vars.items(): - assert src_var.dims == actual[key].dims - - @pytest.mark.parametrize( "funcname, argument", [ From f5f61c1cae8c563f7b9db6d7e94619699023777a Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 16 Jun 2021 10:30:40 -0600 Subject: [PATCH 02/12] Ignore flake8 errors --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index b425d1f66e0..daa1de00f34 100644 --- a/setup.cfg +++ b/setup.cfg @@ -151,6 +151,9 @@ ignore = E501 # line too long - let black worry about that E731 # do not assign a lambda expression, use a def W503 # line break before binary operator +per-file-ignores = + tests/*.py:F401 # module imported but unused + tests/*.py:F811 # redefinition of unused name from line N exclude= .eggs doc From b7df9cb34652dfe2b0dba2f4250e99e83dce644b Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 16 Jun 2021 10:36:21 -0600 Subject: [PATCH 03/12] fix --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index daa1de00f34..b8312a25038 100644 --- a/setup.cfg +++ b/setup.cfg @@ -152,8 +152,8 @@ ignore = E731 # do not assign a lambda expression, use a def W503 # line break before binary operator per-file-ignores = - tests/*.py:F401 # module imported but unused - tests/*.py:F811 # redefinition of unused name from line N + xarray/tests/*.py:F401 + xarray/tests/*.py:F811 exclude= .eggs doc From f47af0519a77498a1c213ce0c5bd933408ecd866 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 16 Jun 2021 10:40:41 -0600 Subject: [PATCH 04/12] moar fix --- setup.cfg | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index b8312a25038..10c5898aa31 100644 --- a/setup.cfg +++ b/setup.cfg @@ -152,8 +152,7 @@ ignore = E731 # do not assign a lambda expression, use a def W503 # line break before binary operator per-file-ignores = - xarray/tests/*.py:F401 - xarray/tests/*.py:F811 + xarray/tests/*.py:F401,F811 exclude= .eggs doc From 472b50e68ce25b5020131c8f602cb39a6c0f04e8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 23 Aug 2020 15:57:49 -0600 Subject: [PATCH 05/12] Add coarsen.construct --- doc/howdoi.rst | 2 +- doc/whats-new.rst | 2 + xarray/core/rolling.py | 75 ++++++++++++++++++++++++++++++++++++ xarray/core/variable.py | 12 +++--- xarray/tests/test_coarsen.py | 47 +++++++++++++++++++++- 5 files changed, 131 insertions(+), 7 deletions(-) diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 9f985b66171..c518b0daba6 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -24,7 +24,7 @@ How do I ... * - change the order of dimensions - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose` * - reshape dimensions - - :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack` + - :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack`, :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` * - remove a variable from my object - :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars` * - remove dimensions of length 1 or 0 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ff67ea20073..83666249ca5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,8 @@ v0.18.3 (unreleased) New Features ~~~~~~~~~~~~ +- Added :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` (:issue:`5454`, :pull:`5475`). + By `Deepak Cherian `_. - Allow assigning values to a subset of a dataset using positional or label-based indexing (:issue:`3015`, :pull:`5362`). By `Matthias Göbel `_. diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 870df122aa9..26942e39c4e 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,4 +1,5 @@ import functools +import itertools import warnings from typing import Any, Callable, Dict @@ -845,6 +846,80 @@ def __repr__(self): klass=self.__class__.__name__, attrs=",".join(attrs) ) + def construct( + self, + window_dim=None, + keep_attrs=None, + **window_dim_kwargs, + ): + """ + Convert this Coarsen object to a Dataset, + where the window dimension is reshaped to new dimensions + + Parameters + ---------- + window_dim: str or a mapping, optional + A mapping from dimension name to the new window dimension names. + Just a string can be used for 1d-rolling. + fill_value: optional. Default dtypes.NA + Filling value to match the dimension size. + **window_dim_kwargs : {dim: new_name, ...}, optional + The keyword arguments form of ``window_dim``. + + Returns + ------- + Dataset with variables converted from rolling object. + """ + + from .dataarray import DataArray + from .dataset import Dataset + + if window_dim is None: + if len(window_dim_kwargs) == 0: + raise ValueError( + "Either window_dim or window_dim_kwargs need to be specified." + ) + window_dim = {d: window_dim_kwargs[d] for d in self.dim} + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + + missing_dims = set(window_dim) - set(self.windows) + if missing_dims: + raise ValueError( + f"'window_dim' must contain entries for all dimensions to coarsen. Missing {missing_dims}" + ) + missing_windows = set(self.windows) - set(window_dim) + if missing_windows: + raise ValueError( + f"'window_dim' includes dimensions that will not be coarsened: {missing_windows}" + ) + + reshaped = Dataset() + if isinstance(self.obj, DataArray): + obj = self.obj._to_temp_dataset() + else: + obj = self.obj + + for key, var in obj.variables.items(): + reshaped_dims = tuple( + itertools.chain(*[window_dim.get(dim, [dim]) for dim in list(var.dims)]) + ) + if reshaped_dims != var.dims: + windows = {w: self.windows[w] for w in window_dim if w in var.dims} + reshaped_var, _ = var.coarsen_reshape(windows, self.boundary, self.side) + attrs = var.attrs if keep_attrs else {} + reshaped[key] = (reshaped_dims, reshaped_var, attrs) + else: + reshaped[key] = var + + should_be_coords = set(window_dim) & set(self.obj.coords) + result = reshaped.set_coords(should_be_coords) + if isinstance(self.obj, DataArray): + return self.obj._from_temp_dataset(result) + else: + return result + class DataArrayCoarsen(Coarsen): __slots__ = () diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2142742101a..5d665270301 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2158,7 +2158,7 @@ def coarsen( if not windows: return self._replace(attrs=_attrs) - reshaped, axes = self._coarsen_reshape(windows, boundary, side) + reshaped, axes = self.coarsen_reshape(windows, boundary, side) if isinstance(func, str): name = func func = getattr(duck_array_ops, name, None) @@ -2167,7 +2167,7 @@ def coarsen( return self._replace(data=func(reshaped, axis=axes, **kwargs), attrs=_attrs) - def _coarsen_reshape(self, windows, boundary, side): + def coarsen_reshape(self, windows, boundary, side): """ Construct a reshaped-array for coarsen """ @@ -2183,7 +2183,9 @@ def _coarsen_reshape(self, windows, boundary, side): for d, window in windows.items(): if window <= 0: - raise ValueError(f"window must be > 0. Given {window}") + raise ValueError( + f"window must be > 0. Given {window} for dimension {d}" + ) variable = self for d, window in windows.items(): @@ -2193,8 +2195,8 @@ def _coarsen_reshape(self, windows, boundary, side): if boundary[d] == "exact": if n * window != size: raise ValueError( - "Could not coarsen a dimension of size {} with " - "window {}".format(size, window) + f"Could not coarsen a dimension of size {size} with " + f"window {window} and boundary='exact'. Try a different 'boundary' option." ) elif boundary[d] == "trim": if side[d] == "left": diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index ca4725a579f..d8f1a0d9193 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -5,7 +5,13 @@ import xarray as xr from xarray import DataArray, Dataset, set_options -from . import assert_allclose, assert_equal, has_dask, requires_cftime +from . import ( + assert_allclose, + assert_equal, + has_dask, + raise_if_dask_computes, + requires_cftime, +) from .test_dataarray import da from .test_dataset import ds @@ -299,3 +305,42 @@ def test_coarsen_da_reduce(da, window, name): actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) expected = getattr(coarsen_obj, name)() assert_allclose(actual, expected) + + +@pytest.mark.parametrize("dask", [True, False]) +def test_coarsen_construct(dask): + + ds = Dataset( + { + "vart": ("time", np.arange(48)), + "varx": ("x", np.arange(10)), + "vartx": (("x", "time"), np.arange(480).reshape(10, 48)), + "vary": ("y", np.arange(12)), + }, + coords={"time": np.arange(48), "y": np.arange(12)}, + ) + + if dask and has_dask: + ds = ds.chunk({"x": 4, "time": 10}) + with raise_if_dask_computes(): + actual = ds.coarsen(time=12, x=5).construct( + {"time": ("year", "month"), "x": ("x", "x_reshaped")} + ) + + expected = xr.Dataset() + expected["vart"] = (("year", "month"), ds.vart.data.reshape((-1, 12))) + expected["varx"] = (("x", "x_reshaped"), ds.varx.data.reshape((-1, 5))) + expected["vartx"] = ( + ("x", "x_reshaped", "year", "month"), + ds.vartx.data.reshape(2, 5, 4, 12), + ) + expected["vary"] = ds.vary + expected.coords["time"] = (("year", "month"), ds.time.data.reshape((-1, 12))) + + assert_equal(actual, expected) + + with raise_if_dask_computes(): + actual = ds.vartx.coarsen(time=12, x=5).construct( + {"time": ("year", "month"), "x": ("x", "x_reshaped")} + ) + assert_equal(actual, expected["vartx"]) From 00a69d549d7eb367190eb5d5497595d1cabd2765 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 16 Jun 2021 15:23:23 -0600 Subject: [PATCH 06/12] Apply suggestions from code review Co-authored-by: Mathias Hauser --- xarray/core/rolling.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 1cd47b6c1ee..8ee629e3164 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -860,7 +860,6 @@ def construct( ---------- window_dim: str or a mapping, optional A mapping from dimension name to the new window dimension names. - Just a string can be used for 1d-rolling. keep_attrs: bool, optional Preserve attributes if True **window_dim_kwargs : {dim: new_name, ...}, optional @@ -889,8 +888,8 @@ def construct( raise ValueError( f"'window_dim' must contain entries for all dimensions to coarsen. Missing {missing_dims}" ) - missing_windows = set(self.windows) - set(window_dim) - if missing_windows: + extra_windows = set(self.windows) - set(window_dim) + if extra_windows: raise ValueError( f"'window_dim' includes dimensions that will not be coarsened: {missing_windows}" ) From 723756166e258e7fc2635224d503e35a3567cd26 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 17 Jun 2021 06:59:23 -0600 Subject: [PATCH 07/12] Better docstring; preserve "global" attrs; fix **kwargs form --- xarray/core/rolling.py | 34 ++++++++++++++++++++++++++-------- xarray/tests/test_coarsen.py | 18 +++++++++++++----- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 1cd47b6c1ee..cd2d999a4c0 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -853,22 +853,38 @@ def construct( **window_dim_kwargs, ): """ - Convert this Coarsen object to a Dataset, - where the window dimension is reshaped to new dimensions + Convert this Coarsen object to a DataArray or Dataset, + where the coarsening dimension is split or reshaped to two + new dimensions. Parameters ---------- - window_dim: str or a mapping, optional - A mapping from dimension name to the new window dimension names. - Just a string can be used for 1d-rolling. + window_dim: mapping + A mapping from existing dimension name to new dimension names. + The size of the second dimension will be the length of the + coarsening window. keep_attrs: bool, optional Preserve attributes if True - **window_dim_kwargs : {dim: new_name, ...}, optional + **window_dim_kwargs : {dim: new_name, ...} The keyword arguments form of ``window_dim``. Returns ------- - Dataset with variables converted from rolling object. + Dataset or DataArray with reshaped dimensions + + Examples + -------- + >>> da = xr.DataArray(np.arange(24), dims="time") + >>> da.coarsen(time=12).construct(time=("year", "month")) + + array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], + [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]]) + Dimensions without coordinates: year, month + + See Also + -------- + DataArrayRolling.construct + DatasetRolling.construct """ from .dataarray import DataArray @@ -879,7 +895,7 @@ def construct( raise ValueError( "Either window_dim or window_dim_kwargs need to be specified." ) - window_dim = {d: window_dim_kwargs[d] for d in self.dim} + window_dim = {d: window_dim_kwargs[d] for d in self.windows} if keep_attrs is None: keep_attrs = _get_keep_attrs(default=True) @@ -901,6 +917,8 @@ def construct( else: obj = self.obj + reshaped.attrs = obj.attrs if keep_attrs else {} + for key, var in obj.variables.items(): reshaped_dims = tuple( itertools.chain(*[window_dim.get(dim, [dim]) for dim in list(var.dims)]) diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index 93b483ef82b..5063d6df5a8 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -319,16 +319,13 @@ def test_coarsen_construct(dask): "vary": ("y", np.arange(12)), }, coords={"time": np.arange(48), "y": np.arange(12)}, + attrs={"foo": "bar"}, ) if dask and has_dask: ds = ds.chunk({"x": 4, "time": 10}) - with raise_if_dask_computes(): - actual = ds.coarsen(time=12, x=5).construct( - {"time": ("year", "month"), "x": ("x", "x_reshaped")} - ) - expected = xr.Dataset() + expected = xr.Dataset(attrs={"foo": "bar"}) expected["vart"] = (("year", "month"), ds.vart.data.reshape((-1, 12)), {"a": "b"}) expected["varx"] = (("x", "x_reshaped"), ds.varx.data.reshape((-1, 5)), {"a": "b"}) expected["vartx"] = ( @@ -339,6 +336,16 @@ def test_coarsen_construct(dask): expected["vary"] = ds.vary expected.coords["time"] = (("year", "month"), ds.time.data.reshape((-1, 12))) + with raise_if_dask_computes(): + actual = ds.coarsen(time=12, x=5).construct( + {"time": ("year", "month"), "x": ("x", "x_reshaped")} + ) + assert_identical(actual, expected) + + with raise_if_dask_computes(): + actual = ds.coarsen(time=12, x=5).construct( + time=("year", "month"), x=("x", "x_reshaped") + ) assert_identical(actual, expected) with raise_if_dask_computes(): @@ -347,6 +354,7 @@ def test_coarsen_construct(dask): ) for var in actual: assert actual[var].attrs == {} + assert actual.attrs == {} with raise_if_dask_computes(): actual = ds.vartx.coarsen(time=12, x=5).construct( From d8602bcf92490f344d812effaa9d295e57415104 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 17 Jun 2021 08:25:40 -0600 Subject: [PATCH 08/12] Better error checking. --- xarray/core/rolling.py | 24 ++++++++++++++++++------ xarray/tests/test_coarsen.py | 15 +++++++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 29053751b78..b87dcda24b0 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -9,6 +9,7 @@ from .arithmetic import CoarsenArithmetic from .options import _get_keep_attrs from .pycompat import is_duck_dask_array +from .utils import either_dict_or_kwargs try: import bottleneck @@ -890,12 +891,23 @@ def construct( from .dataarray import DataArray from .dataset import Dataset - if window_dim is None: - if len(window_dim_kwargs) == 0: - raise ValueError( - "Either window_dim or window_dim_kwargs need to be specified." - ) - window_dim = {d: window_dim_kwargs[d] for d in self.windows} + window_dim = either_dict_or_kwargs( + window_dim, window_dim_kwargs, "Coarsen.construct" + ) + if not window_dim: + raise ValueError( + "Either window_dim or window_dim_kwargs need to be specified." + ) + + bad_new_dims = tuple( + win + for win, dims in window_dim.items() + if len(dims) != 2 or isinstance(dims, str) + ) + if bad_new_dims: + raise ValueError( + f"Please provide exactly two dimension names for the following coarsening dimensions: {bad_new_dims}" + ) if keep_attrs is None: keep_attrs = _get_keep_attrs(default=True) diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index 5063d6df5a8..dff6957240f 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -361,3 +361,18 @@ def test_coarsen_construct(dask): {"time": ("year", "month"), "x": ("x", "x_reshaped")} ) assert_equal(actual, expected["vartx"]) + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct(foo="bar") + + with pytest.raises(ValueError): + ds.coarsen(time=12, x=2).construct(time=("year", "month")) + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct() + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct(time="bar") + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct(time=("bar",)) From a3ba5e2a7c7bc82045293b073bb41f5d4cff0988 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 21 Jun 2021 10:47:56 -0600 Subject: [PATCH 09/12] stricter test --- xarray/tests/test_coarsen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index dff6957240f..503c742252a 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -360,7 +360,7 @@ def test_coarsen_construct(dask): actual = ds.vartx.coarsen(time=12, x=5).construct( {"time": ("year", "month"), "x": ("x", "x_reshaped")} ) - assert_equal(actual, expected["vartx"]) + assert_identical(actual, expected["vartx"]) with pytest.raises(ValueError): ds.coarsen(time=12).construct(foo="bar") From e07ea65dd03b51a7b6376776e5cb8b555662bb6c Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 21 Jun 2021 11:08:56 -0600 Subject: [PATCH 10/12] [skip-ci] Fix RTD --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2f7a991a3af..567c3b28f8c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -724,7 +724,7 @@ Documentation By `Pieter Gijsbers `_. - Fix grammar and typos in the :doc:`contributing` guide (:pull:`4545`). By `Sahid Velji `_. -- Fix grammar and typos in the :doc:`io` guide (:pull:`4553`). +- Fix grammar and typos in the :doc:`user-guide/io` guide (:pull:`4553`). By `Sahid Velji `_. - Update link to NumPy docstring standard in the :doc:`contributing` guide (:pull:`4558`). By `Sahid Velji `_. From 7758ee77e74873db5764c3048f20eeb20b4ea46a Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 21 Jun 2021 11:21:41 -0600 Subject: [PATCH 11/12] [skip-ci] more RTD fix --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 567c3b28f8c..44d91d5e796 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3033,7 +3033,7 @@ Documentation - Added apply_ufunc example to :ref:`/examples/weather-data.ipynb#Toy-weather-data` (:issue:`1844`). By `Liam Brannigan `_. - New entry `Why don’t aggregations return Python scalars?` in the - :doc:`faq` (:issue:`1726`). + :doc:`getting-started-guide/faq` (:issue:`1726`). By `0x0L `_. Enhancements From 4b69c9fca46d8186c81dcacd89e48fbbfe013632 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 21 Jun 2021 17:05:36 -0600 Subject: [PATCH 12/12] [skip-ci] fix doc links --- doc/api-hidden.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 3f936506234..076b0eb452a 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -41,6 +41,7 @@ core.rolling.DatasetCoarsen.all core.rolling.DatasetCoarsen.any + core.rolling.DatasetCoarsen.construct core.rolling.DatasetCoarsen.count core.rolling.DatasetCoarsen.max core.rolling.DatasetCoarsen.mean @@ -185,6 +186,7 @@ core.rolling.DataArrayCoarsen.all core.rolling.DataArrayCoarsen.any + core.rolling.DataArrayCoarsen.construct core.rolling.DataArrayCoarsen.count core.rolling.DataArrayCoarsen.max core.rolling.DataArrayCoarsen.mean