zarr-developers · TomAugspurger · Sep 18, 2024 · TomAugspurger · Sep 18, 2024 · jhamman
diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import base64
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
 
@@ -29,6 +30,8 @@ def to_dict(self) -> dict[str, JSON]:
             value = getattr(self, key)
             if isinstance(value, Metadata):
                 out_dict[field.name] = getattr(self, field.name).to_dict()
+            elif isinstance(value, bytes):
+                out_dict[key] = base64.b64encode(value)
             elif isinstance(value, str):
                 out_dict[key] = value
             elif isinstance(value, Sequence):

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
@@ -313,7 +313,7 @@ def parse_fill_value(
     """
     if fill_value is None:
         return dtype.type(0)
-    if isinstance(fill_value, Sequence) and not isinstance(fill_value, str):
+    if isinstance(fill_value, Sequence) and not isinstance(fill_value, str | bytes):
         if dtype in (np.complex64, np.complex128):
             dtype = cast(COMPLEX_DTYPE, dtype)
             if len(fill_value) == 2:

diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py
@@ -1,11 +1,13 @@
 from __future__ import annotations
 
+import dataclasses
 import json
 import re
 from typing import TYPE_CHECKING, Literal
 
 from zarr.codecs.bytes import BytesCodec
 from zarr.core.buffer import default_buffer_prototype
+from zarr.core.chunk_grids import RegularChunkGrid
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.metadata.v3 import ArrayV3Metadata
 
@@ -165,6 +167,31 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
         parse_fill_value(fill_value, dtype)
 
 
+def test_parse_fill_value_bytes():
+    result = parse_fill_value("", dtype=np.dtype("S6"))
+    assert result == np.bytes_("")
+
+
+@pytest.mark.parametrize("fill_value", [None, np.bytes_(b"")])
+def test_fill_value_bytes(fill_value: Any) -> None:
+    md = ArrayV3Metadata(
+        shape=(4,),
+        data_type=np.dtype("S6"),
+        fill_value=fill_value,
+        chunk_grid=RegularChunkGrid(chunk_shape=(2,)),
+        chunk_key_encoding=DefaultChunkKeyEncoding(),
+        codecs=(),
+        attributes={},
+        dimension_names=("a",),
+    )
+    assert md.fill_value == np.bytes_(b"")
+    assert md.dtype == np.dtype("S6")
+    # regression test for creating a new metadata from default values
+    dataclasses.replace(md)
+    serialized = md.to_dict()
+    assert serialized
+
+
 @pytest.mark.parametrize("chunk_grid", ["regular"])
 @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
 @pytest.mark.parametrize("codecs", [[BytesCodec()]])