From b9690e02ab8224bbe1a5c88b7df126ddffdfd399 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 25 Nov 2025 12:24:39 -0500 Subject: [PATCH 1/6] fix: 1024 bytes is not 1MiB! --- src/zarr/core/chunk_grids.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 7ebd68b5b4..e540506f73 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -62,6 +62,8 @@ def _guess_chunks( tuple[int, ...] """ + if min_bytes >= max_bytes: + raise ValueError(f"Cannot have more min_bytes {min_bytes} than max_bytes {max_bytes}") if isinstance(shape, int): shape = (shape,) @@ -264,7 +266,7 @@ def _auto_partition( else: if chunk_shape == "auto": # aim for a 1MiB chunk - _chunks_out = _guess_chunks(array_shape, item_size, max_bytes=1024) + _chunks_out = _guess_chunks(array_shape, item_size, max_bytes=1049000) else: _chunks_out = chunk_shape From 828674b86225641112bee10640150ae81c177fd0 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 25 Nov 2025 12:27:53 -0500 Subject: [PATCH 2/6] fix: correct number --- src/zarr/core/chunk_grids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index e540506f73..a7467131dc 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -266,7 +266,7 @@ def _auto_partition( else: if chunk_shape == "auto": # aim for a 1MiB chunk - _chunks_out = _guess_chunks(array_shape, item_size, max_bytes=1049000) + _chunks_out = _guess_chunks(array_shape, item_size, max_bytes=1048576) else: _chunks_out = chunk_shape From 0cd183ab3cbbfccf4627eb9a9d620a7d585fc3c1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 25 Nov 2025 12:40:43 -0500 Subject: [PATCH 3/6] chore: add test --- tests/test_array.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_array.py b/tests/test_array.py index 61828be0aa..67be294827 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1019,6 +1019,24 @@ def test_auto_partition_auto_shards( assert auto_shards == expected_shards +def test_auto_partition_auto_shards_with_auto_chunks_should_be_close_to_1MiB() -> None: + """ + Test that automatically picking a shard size and a chunk size gives roughly 1MiB chunks. + """ + with pytest.warns( + ZarrUserWarning, + match="Automatic shard shape inference is experimental and may change without notice.", + ): + with zarr.config.set({"array.target_shard_size_bytes": 10_000_000}): + _, chunk_shape = _auto_partition( + array_shape=(10_000_000,), + chunk_shape="auto", + shard_shape="auto", + item_size=1, + ) + assert chunk_shape == (625000,) + + def test_chunks_and_shards() -> None: store = StorePath(MemoryStore()) shape = (100, 100) From 6be78c620bc699de3c93600ae2a39bbe94af70b7 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 25 Nov 2025 12:42:16 -0500 Subject: [PATCH 4/6] chore: relnote --- changes/3603.fix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3603.fix.md diff --git a/changes/3603.fix.md b/changes/3603.fix.md new file mode 100644 index 0000000000..37e1da5cb1 --- /dev/null +++ b/changes/3603.fix.md @@ -0,0 +1 @@ +Correct the target bytes number for auto-chunking when auto-sharding. \ No newline at end of file From 2b00571402b25d64a22d278235d90e87a260ca97 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 25 Nov 2025 12:45:25 -0500 Subject: [PATCH 5/6] fix: name --- changes/{3603.fix.md => 3603.bugfix.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changes/{3603.fix.md => 3603.bugfix.md} (100%) diff --git a/changes/3603.fix.md b/changes/3603.bugfix.md similarity index 100% rename from changes/3603.fix.md rename to changes/3603.bugfix.md From 97049b22a41b00cface31491f1167752c3f0356f Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 27 Nov 2025 16:57:18 +0100 Subject: [PATCH 6/6] Apply suggestion from @d-v-b --- src/zarr/core/chunk_grids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index a7467131dc..2c7945fa64 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -63,7 +63,7 @@ def _guess_chunks( """ if min_bytes >= max_bytes: - raise ValueError(f"Cannot have more min_bytes {min_bytes} than max_bytes {max_bytes}") + raise ValueError(f"Cannot have more min_bytes ({min_bytes}) than max_bytes ({max_bytes})") if isinstance(shape, int): shape = (shape,)