From 2e42c84841c5e671d486857dba24ae0d05aec3ed Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Wed, 12 Jun 2024 11:08:59 +0200 Subject: [PATCH 1/5] Allow duplicate dimensions in chunking --- xarray/namedarray/core.py | 6 +++++- xarray/tests/test_dask.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 960ab9d4d1d..494550ccad8 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -812,7 +812,11 @@ def chunk( chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") if is_dict_like(chunks): - chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} + _numbered_chunks = {} + for dim_number, dim in enumerate(self.dims): + if dim in chunks: + _numbered_chunks[dim_number] = chunks[dim] + chunks = _numbered_chunks chunkmanager = guess_chunkmanager(chunked_array_type) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 517fc0c2d62..1c3774c8d0e 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -635,6 +635,11 @@ def counting_get(*args, **kwargs): assert count[0] == 1 + def test_duplicate_dims(self): + data = np.random.normal(size=(4, 4)) + arr = DataArray(data, dims=("x", "x")) + arr.chunk({"x": 2}) + def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) arr = DataArray(data, dims=("w", "x", "y")) From af380cff56f215bf9e79fa9cfaae7e1de0092090 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Thu, 13 Jun 2024 09:18:34 +0200 Subject: [PATCH 2/5] Address review comments --- doc/whats-new.rst | 3 +++ xarray/namedarray/core.py | 10 +++++----- xarray/tests/test_dask.py | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0621ec1a64b..8e5a3c189ea 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,6 +50,9 @@ Bug fixes support aribtrary kwargs such as ``order`` for polynomial interpolation. (:issue:`8762`). By `Nicolas Karasiak `_. +- Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`). + By `Martin Raspaud `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 494550ccad8..464af0fd7c4 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -812,11 +812,11 @@ def chunk( chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") if is_dict_like(chunks): - _numbered_chunks = {} - for dim_number, dim in enumerate(self.dims): - if dim in chunks: - _numbered_chunks[dim_number] = chunks[dim] - chunks = _numbered_chunks + chunks = { + dim_number: chunks[dim] + for dim_number, dim in enumerate(self.dims) + if dim in chunks + } chunkmanager = guess_chunkmanager(chunked_array_type) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 1c3774c8d0e..7ebca31479c 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -638,7 +638,8 @@ def counting_get(*args, **kwargs): def test_duplicate_dims(self): data = np.random.normal(size=(4, 4)) arr = DataArray(data, dims=("x", "x")) - arr.chunk({"x": 2}) + chunked_array = arr.chunk({"x": 2}) + assert chunked_array.chunks == ((2, 2), (2, 2)) def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) From 3853dfd8e7259275a48c6160e46e7ab415f87e6c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Jun 2024 09:09:31 -0600 Subject: [PATCH 3/5] fix whats-new --- doc/whats-new.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9e748b3bac7..e7a48458ae2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,8 @@ v2024.06.1 (unreleased) New Features ~~~~~~~~~~~~ - +- Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`). + By `Martin Raspaud `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -73,10 +74,6 @@ Bug fixes support arbitrary kwargs such as ``order`` for polynomial interpolation (:issue:`8762`). By `Nicolas Karasiak `_. -- Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`). - By `Martin Raspaud `_. - - Documentation ~~~~~~~~~~~~~ - Add link to CF Conventions on packed data and sentence on type determination in the I/O user guide (:issue:`9041`, :pull:`9045`). From c8a5194941e1367423e5bde1bf7affd067cef150 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Jun 2024 09:10:14 -0600 Subject: [PATCH 4/5] add comment --- xarray/namedarray/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 464af0fd7c4..fe47bf50533 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -812,6 +812,7 @@ def chunk( chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") if is_dict_like(chunks): + # This method of iteration allows for duplicated dimension names, GH8579 chunks = { dim_number: chunks[dim] for dim_number, dim in enumerate(self.dims) From 9e671de807a0d7b497267db002c16df165b3174d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 13 Jun 2024 11:58:35 -0600 Subject: [PATCH 5/5] Update xarray/tests/test_dask.py --- xarray/tests/test_dask.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 7ebca31479c..baaa1d56d99 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -640,6 +640,7 @@ def test_duplicate_dims(self): arr = DataArray(data, dims=("x", "x")) chunked_array = arr.chunk({"x": 2}) assert chunked_array.chunks == ((2, 2), (2, 2)) + assert chunked_array.chunksizes == {"x": (2, 2)} def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4))