From 959ac7975d2596e8ee38366f8ff932fc2a03b020 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Sat, 25 Oct 2025 00:18:03 +0100
Subject: [PATCH 01/10] Initial tests.

---
 lib/iris/fileformats/cf.py                    |   2 +-
 .../integration/netcdf/test_chararrays.py     | 112 ++++++++++++++++++
 2 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 lib/iris/tests/integration/netcdf/test_chararrays.py

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
index 2b6568c315..b65ab70792 100644
--- a/lib/iris/fileformats/cf.py
+++ b/lib/iris/fileformats/cf.py
@@ -802,7 +802,7 @@ def cf_label_data(self, cf_data_var):
         label_data = self[:]
 
         if ma.isMaskedArray(label_data):
-            label_data = label_data.filled()
+            label_data = label_data.filled(b"\0")
 
         # Determine whether we have a string-valued scalar label
         # i.e. a character variable that only has one dimension (the length of the string).
diff --git a/lib/iris/tests/integration/netcdf/test_chararrays.py b/lib/iris/tests/integration/netcdf/test_chararrays.py
new file mode 100644
index 0000000000..feb93047dd
--- /dev/null
+++ b/lib/iris/tests/integration/netcdf/test_chararrays.py
@@ -0,0 +1,112 @@
+import netCDF4 as nc
+import numpy as np
+import pytest
+
+import iris
+
+NX, N_STRLEN = 3, 64
+TEST_STRINGS = ["Münster", "London", "Amsterdam"]
+TEST_COORD_VALS = ["bun", "éclair", "sandwich"]
+
+
+def convert_chararray(string_array_1d, maxlen, encoding="utf-8"):
+    bbytes = [text.encode(encoding) for text in string_array_1d]
+    pad = b"\0" * maxlen
+    bbytes = [(x + pad)[:maxlen] for x in bbytes]
+    chararray = np.array([[bb[i : i + 1] for i in range(maxlen)] for bb in bbytes])
+    return chararray
+
+
+INCLUDE_COORD = True
+# INCLUDE_COORD = False
+
+
+def make_testfile(filepath, chararray, coordarray, encoding_str=None):
+    with nc.Dataset(filepath, "w") as ds:
+        ds.createDimension("x", NX)
+        ds.createDimension("nstr", N_STRLEN)
+        vx = ds.createVariable("x", int, dimensions=("x"))
+        vx[:] = np.arange(NX)
+        if INCLUDE_COORD:
+            ds.createDimension("nstr2", N_STRLEN)
+            v_co = ds.createVariable(
+                "v_co",
+                "S1",
+                dimensions=(
+                    "x",
+                    "nstr2",
+                ),
+            )
+            v_co[:] = coordarray
+            if encoding_str is not None:
+                v_co._Encoding = encoding_str
+        v = ds.createVariable(
+            "v",
+            "S1",
+            dimensions=(
+                "x",
+                "nstr",
+            ),
+        )
+        v[:] = chararray
+        if encoding_str is not None:
+            v._Encoding = encoding_str
+        if INCLUDE_COORD:
+            v.coordinates = "v_co"
+
+
+def show_result(filepath):
+    from pp_utils import ncdump
+
+    print(f"File {filepath}")
+    print("NCDUMP:")
+    ncdump(filepath, "")
+    # with nc.Dataset(filepath, "r") as ds:
+    #     v = ds.variables["v"]
+    #     print("\n----\nNetcdf data readback (basic)")
+    #     try:
+    #         print(repr(v[:]))
+    #     except UnicodeDecodeError as err:
+    #         print(repr(err))
+    #     print("..raw:")
+    #     v.set_auto_chartostring(False)
+    #     print(repr(v[:]))
+    print("\nAs iris cube..")
+    try:
+        cube = iris.load_cube(filepath)
+        print(cube)
+        if iris.loading.LOAD_PROBLEMS._problems:
+            print(iris.loading.LOAD_PROBLEMS)
+            print(
+                "\n".join(iris.loading.LOAD_PROBLEMS._problems[0].stack_trace.format())
+            )
+        print("-data-")
+        print(repr(cube.data))
+        if INCLUDE_COORD:
+            print("-coord data-")
+            try:
+                print(repr(cube.coord("v_co").points))
+            except Exception as err2:
+                print(repr(err2))
+    except UnicodeDecodeError as err:
+        print(repr(err))
+
+
+# tsts = (None, "ascii", "utf-8", "utf-32",)
+# tsts = ("utf-8",)
+# tsts = ("utf-8", "utf-32",)
+# tsts = ("utf-32",)
+tsts = ("utf-8", "ascii", "utf-8")
+
+
+@pytest.mark.parametrize("encoding", tsts)
+def test_encodings(encoding):
+    print(f"\n=========\nTesting encoding: {encoding}")
+    filepath = f"tmp_{str(encoding)}.nc"
+    do_as = encoding
+    if encoding != "utf-32":
+        do_as = "utf-8"
+    TEST_CHARARRAY = convert_chararray(TEST_STRINGS, N_STRLEN, encoding=do_as)
+    TEST_COORDARRAY = convert_chararray(TEST_COORD_VALS, N_STRLEN, encoding=do_as)
+    make_testfile(filepath, TEST_CHARARRAY, TEST_COORDARRAY, encoding_str=encoding)
+    show_result(filepath)

From 814932eba65a43716acb161103a611b5010c77ed Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Sat, 25 Oct 2025 01:22:30 +0100
Subject: [PATCH 02/10] Get 'create_cf_data_variable' to call
 'create_generic_cf_array_var': Mostly working?

Get 'create_cf_data_variable' to call 'create_generic_cf_array_var': Mostly working?
---
 .../fileformats/_nc_load_rules/helpers.py     |   8 +-
 lib/iris/fileformats/netcdf/saver.py          | 158 +++++++++---------
 .../integration/netcdf/test_chararrays.py     |   1 +
 3 files changed, 85 insertions(+), 82 deletions(-)

diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py
index 35c2e96924..50e282db5f 100644
--- a/lib/iris/fileformats/_nc_load_rules/helpers.py
+++ b/lib/iris/fileformats/_nc_load_rules/helpers.py
@@ -708,13 +708,13 @@ def build_and_add_global_attributes(engine: Engine):
             ),
         )
         if problem is not None:
-            stack_notes = problem.stack_trace.__notes__
+            stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
             if stack_notes is None:
                 stack_notes = []
             stack_notes.append(
                 f"Skipping disallowed global attribute '{attr_name}' (see above error)"
             )
-            problem.stack_trace.__notes__ = stack_notes
+            problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
 
 
 ################################################################################
@@ -1536,14 +1536,14 @@ def build_and_add_dimension_coordinate(
     )
     if problem is not None:
         coord_var_name = str(cf_coord_var.cf_name)
-        stack_notes = problem.stack_trace.__notes__
+        stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
         if stack_notes is None:
             stack_notes = []
         stack_notes.append(
             f"Failed to create {coord_var_name} dimension coordinate:\n"
             f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead."
         )
-        problem.stack_trace.__notes__ = stack_notes
+        problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
         problem.handled = True
 
         _ = _add_or_capture(
diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index 5177749c07..bd4e87471f 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -759,7 +759,7 @@ def _create_cf_dimensions(self, cube, dimension_names, unlimited_dimensions=None
                     # used for a different one
                     pass
                 else:
-                    dim_name = self._get_coord_variable_name(cube, coord)
+                    dim_name = self._get_element_variable_name(cube, coord)
                     unlimited_dim_names.append(dim_name)
 
         for dim_name in dimension_names:
@@ -990,12 +990,12 @@ def _add_aux_coords(
         ]
 
         # Include any relevant mesh location coordinates.
-        mesh: MeshXY | None = getattr(cube, "mesh")
-        mesh_location: str | None = getattr(cube, "location")
+        mesh: MeshXY | None = getattr(cube, "mesh")  # type: ignore[annotation-unchecked]
+        mesh_location: str | None = getattr(cube, "location")  # type: ignore[annotation-unchecked]
         if mesh and mesh_location:
             location_coords: MeshNodeCoords | MeshEdgeCoords | MeshFaceCoords = getattr(
                 mesh, f"{mesh_location}_coords"
-            )
+            )  # type: ignore[annotation-unchecked]
             coords_to_add.extend(list(location_coords))
 
         return self._add_inner_related_vars(
@@ -1365,7 +1365,7 @@ def record_dimension(names_list, dim_name, length, matching_coords=None):
                         if dim_name is None:
                             # Not already present : create  a unique dimension name
                             # from the coord.
-                            dim_name = self._get_coord_variable_name(cube, coord)
+                            dim_name = self._get_element_variable_name(cube, coord)
                             # Disambiguate if it has the same name as an
                             # existing dimension.
                             # OR if it matches an existing file variable name.
@@ -1541,38 +1541,14 @@ def _create_cf_bounds(self, coord, cf_var, cf_name, /, *, compression_kwargs=Non
             )
             self._lazy_stream_data(data=bounds, cf_var=cf_var_bounds)
 
-    def _get_cube_variable_name(self, cube):
-        """Return a CF-netCDF variable name for the given cube.
-
-        Parameters
-        ----------
-        cube : :class:`iris.cube.Cube`
-            An instance of a cube for which a CF-netCDF variable
-            name is required.
-
-        Returns
-        -------
-        str
-            A CF-netCDF variable name as a string.
-
-        """
-        if cube.var_name is not None:
-            cf_name = cube.var_name
-        else:
-            # Convert to lower case and replace whitespace by underscores.
-            cf_name = "_".join(cube.name().lower().split())
-
-        cf_name = self.cf_valid_var_name(cf_name)
-        return cf_name
-
-    def _get_coord_variable_name(self, cube_or_mesh, coord):
-        """Return a CF-netCDF variable name for a given coordinate-like element.
+    def _get_element_variable_name(self, cube_or_mesh, element):
+        """Return a CF-netCDF variable name for a given coordinate-like element, or cube.
 
         Parameters
         ----------
         cube_or_mesh : :class:`iris.cube.Cube` or :class:`iris.mesh.MeshXY`
             The Cube or Mesh being saved to the netCDF file.
-        coord : :class:`iris.coords._DimensionalMetadata`
+        element : :class:`iris.coords._DimensionalMetadata` | :class:``iris.cube.Cube``
             An instance of a coordinate (or similar), for which a CF-netCDF
             variable name is required.
 
@@ -1592,17 +1568,21 @@ def _get_coord_variable_name(self, cube_or_mesh, coord):
             cube = None
             mesh = cube_or_mesh
 
-        if coord.var_name is not None:
-            cf_name = coord.var_name
+        if element.var_name is not None:
+            cf_name = element.var_name
+        elif isinstance(element, Cube):
+            # Make name for a Cube without a var_name.
+            cf_name = "_".join(element.name().lower().split())
         else:
-            name = coord.standard_name or coord.long_name
+            # Make name for a Coord-like element without a var_name
+            name = element.standard_name or element.long_name
             if not name or set(name).intersection(string.whitespace):
                 # We need to invent a name, based on its associated dimensions.
-                if cube is not None and cube.coords(coord):
-                    # It is a regular cube coordinate.
+                if cube is not None and cube.elements(element):
+                    # It is a regular cube elementinate.
                     # Auto-generate a name based on the dims.
                     name = ""
-                    for dim in cube.coord_dims(coord):
+                    for dim in cube.coord_dims(element):
                         name += f"dim{dim}"
                     # Handle scalar coordinate (dims == ()).
                     if not name:
@@ -1616,8 +1596,8 @@ def _get_coord_variable_name(self, cube_or_mesh, coord):
 
                     # At present, a location-coord cannot be nameless, as the
                     # MeshXY code relies on guess_coord_axis.
-                    assert isinstance(coord, Connectivity)
-                    location = coord.cf_role.split("_")[0]
+                    assert isinstance(element, Connectivity)
+                    location = element.cf_role.split("_")[0]
                     location_dim_attr = f"{location}_dimension"
                     name = getattr(mesh, location_dim_attr)
 
@@ -1693,6 +1673,8 @@ def _create_mesh(self, mesh):
         return cf_mesh_name
 
     def _set_cf_var_attributes(self, cf_var, element):
+        from iris.cube import Cube
+
         # Deal with CF-netCDF units, and add the name+units properties.
         if isinstance(element, iris.coords.Coord):
             # Fix "degree" units if needed.
@@ -1715,19 +1697,21 @@ def _set_cf_var_attributes(self, cf_var, element):
         if element.units.calendar:
             _setncattr(cf_var, "calendar", str(element.units.calendar))
 
-        # Add any other custom coordinate attributes.
-        for name in sorted(element.attributes):
-            value = element.attributes[name]
+        if not isinstance(element, Cube):
+            # Add any other custom coordinate attributes.
+            # N.B. not Cube, which has specific handling in  _create_cf_data_variable
+            for name in sorted(element.attributes):
+                value = element.attributes[name]
 
-            if name == "STASH":
-                # Adopting provisional Metadata Conventions for representing MO
-                # Scientific Data encoded in NetCDF Format.
-                name = "um_stash_source"
-                value = str(value)
+                if name == "STASH":
+                    # Adopting provisional Metadata Conventions for representing MO
+                    # Scientific Data encoded in NetCDF Format.
+                    name = "um_stash_source"
+                    value = str(value)
 
-            # Don't clobber existing attributes.
-            if not hasattr(cf_var, name):
-                _setncattr(cf_var, name, value)
+                # Don't clobber existing attributes.
+                if not hasattr(cf_var, name):
+                    _setncattr(cf_var, name, value)
 
     def _create_generic_cf_array_var(
         self,
@@ -1739,6 +1723,7 @@ def _create_generic_cf_array_var(
         element_dims=None,
         fill_value=None,
         compression_kwargs=None,
+        is_dataless=False,
     ):
         """Create theCF-netCDF variable given dimensional_metadata.
 
@@ -1791,7 +1776,7 @@ def _create_generic_cf_array_var(
 
         # Work out the var-name to use.
         # N.B. the only part of this routine that may use a mesh _or_ a cube.
-        cf_name = self._get_coord_variable_name(cube_or_mesh, element)
+        cf_name = self._get_element_variable_name(cube_or_mesh, element)
         while cf_name in self._dataset.variables:
             cf_name = self._increment_name(cf_name)
 
@@ -1804,10 +1789,13 @@ def _create_generic_cf_array_var(
         # Get the data values, in a way which works for any element type, as
         # all are subclasses of _DimensionalMetadata.
         # (e.g. =points if a coord, =data if an ancillary, etc)
-        data = element._core_values()
+        if isinstance(element, Cube):
+            data = element.core_data()
+        else:
+            data = element._core_values()
 
         # This compression contract is *not* applicable to a mesh.
-        if cube and cube.shape != data.shape:
+        if cube is not None and data is not None and cube.shape != data.shape:
             compression_kwargs = {}
 
         if np.issubdtype(data.dtype, np.str_):
@@ -1837,11 +1825,13 @@ def _create_generic_cf_array_var(
             # Convert data from an array of strings into a character array
             # with an extra string-length dimension.
             if len(element_dims) == 1:
+                # Scalar variable (only has string dimension).
                 data_first = data[0]
                 if is_lazy_data(data_first):
                     data_first = dask.compute(data_first)
                 data = list("%- *s" % (string_dimension_depth, data_first))
             else:
+                # NOTE: at present, can't do this lazily??
                 orig_shape = data.shape
                 new_shape = orig_shape + (string_dimension_depth,)
                 new_data = np.zeros(new_shape, cf_var.dtype)
@@ -1850,7 +1840,7 @@ def _create_generic_cf_array_var(
                     new_data[index_slice] = list(
                         "%- *s" % (string_dimension_depth, data[index])
                     )
-                data = new_data
+                    data = new_data
         else:
             # A normal (numeric) variable.
             # ensure a valid datatype for the file format.
@@ -1887,7 +1877,8 @@ def _create_generic_cf_array_var(
             )
 
         # Add the data to the CF-netCDF variable.
-        self._lazy_stream_data(data=data, cf_var=cf_var)
+        if not is_dataless:
+            self._lazy_stream_data(data=data, cf_var=cf_var)
 
         # Add names + units
         self._set_cf_var_attributes(cf_var, element)
@@ -2238,9 +2229,9 @@ def _create_cf_grid_mapping(self, cube, cf_var_cube):
                     cfvar = self._name_coord_map.name(coord)
                     if not cfvar:
                         # not found - create and store it:
-                        cfvar = self._get_coord_variable_name(cube, coord)
+                        cfvar = self._get_element_variable_name(cube, coord)
                         self._name_coord_map.append(
-                            cfvar, self._get_coord_variable_name(cube, coord)
+                            cfvar, self._get_element_variable_name(cube, coord)
                         )
                     cfvar_names.append(cfvar)
 
@@ -2383,32 +2374,43 @@ def set_packing_ncattrs(cfvar):
                 if add_offset:
                     _setncattr(cfvar, "add_offset", add_offset)
 
-        cf_name = self._get_cube_variable_name(cube)
-        while cf_name in self._dataset.variables:
-            cf_name = self._increment_name(cf_name)
-
+        # cf_name = self._get_element_variable_name(cube_or_mesh=None, element=cube)
+        # while cf_name in self._dataset.variables:
+        #     cf_name = self._increment_name(cf_name)
+        #
+        # cf_var = self._dataset.createVariable(
+        #     cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs
+        # )
         # Create the cube CF-netCDF data variable with data payload.
-        cf_var = self._dataset.createVariable(
-            cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs
+        cf_name = self._create_generic_cf_array_var(
+            cube,
+            dimension_names,
+            cube,
+            element_dims=dimension_names,
+            fill_value=fill_value,
+            compression_kwargs=kwargs,
+            is_dataless=is_dataless,
         )
+        cf_var = self._dataset.variables[cf_name]
 
         if not is_dataless:
             set_packing_ncattrs(cf_var)
-            self._lazy_stream_data(data=data, cf_var=cf_var)
-
-        if cube.standard_name:
-            _setncattr(cf_var, "standard_name", cube.standard_name)
-
-        if cube.long_name:
-            _setncattr(cf_var, "long_name", cube.long_name)
-
-        if cube.units.is_udunits():
-            _setncattr(cf_var, "units", str(cube.units))
-
-        # Add the CF-netCDF calendar attribute.
-        if cube.units.calendar:
-            _setncattr(cf_var, "calendar", cube.units.calendar)
 
+        # if cube.standard_name:
+        #     _setncattr(cf_var, "standard_name", cube.standard_name)
+        #
+        # if cube.long_name:
+        #     _setncattr(cf_var, "long_name", cube.long_name)
+        #
+        # if cube.units.is_udunits():
+        #     _setncattr(cf_var, "units", str(cube.units))
+        #
+        # # Add the CF-netCDF calendar attribute.
+        # if cube.units.calendar:
+        #     _setncattr(cf_var, "calendar", cube.units.calendar)
+
+        # Set attributes: NB this part is cube-specific (not the same for components)
+        # - therefore 'set_cf_var_attributes' doesn't set attributes if element is a Cube
         if iris.FUTURE.save_split_attrs:
             attr_names = cube.attributes.locals.keys()
         else:
diff --git a/lib/iris/tests/integration/netcdf/test_chararrays.py b/lib/iris/tests/integration/netcdf/test_chararrays.py
index feb93047dd..a3ce9f9128 100644
--- a/lib/iris/tests/integration/netcdf/test_chararrays.py
+++ b/lib/iris/tests/integration/netcdf/test_chararrays.py
@@ -101,6 +101,7 @@ def show_result(filepath):
 
 @pytest.mark.parametrize("encoding", tsts)
 def test_encodings(encoding):
+    # small change
     print(f"\n=========\nTesting encoding: {encoding}")
     filepath = f"tmp_{str(encoding)}.nc"
     do_as = encoding

From 24c299f36bd8ba4a4dbc8792e7406affad87bab1 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Tue, 28 Oct 2025 21:11:15 +0000
Subject: [PATCH 03/10] Reinstate decode on load, now in-Iris coded.

---
 .../fileformats/_nc_load_rules/helpers.py     | 10 ++-
 lib/iris/fileformats/cf.py                    | 18 +++++-
 .../fileformats/netcdf/_thread_safe_nc.py     | 45 +++++++++++--
 lib/iris/fileformats/netcdf/loader.py         | 38 ++++++++++-
 lib/iris/fileformats/netcdf/saver.py          |  4 +-
 .../integration/netcdf/test_chararrays.py     | 64 ++++++++++++++++++-
 lib/iris/util.py                              | 21 ++++++
 7 files changed, 184 insertions(+), 16 deletions(-)

diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py
index 50e282db5f..fa63002f09 100644
--- a/lib/iris/fileformats/_nc_load_rules/helpers.py
+++ b/lib/iris/fileformats/_nc_load_rules/helpers.py
@@ -1643,9 +1643,13 @@ def _add_auxiliary_coordinate(
 
     # Determine the name of the dimension/s shared between the CF-netCDF data variable
     # and the coordinate being built.
-    common_dims = [
-        dim for dim in cf_coord_var.dimensions if dim in engine.cf_var.dimensions
-    ]
+    coord_dims = cf_coord_var.dimensions
+    if cf._is_str_dtype(cf_coord_var):
+        coord_dims = coord_dims[:-1]
+    datavar_dims = engine.cf_var.dimensions
+    if cf._is_str_dtype(engine.cf_var):
+        datavar_dims = datavar_dims[:-1]
+    common_dims = [dim for dim in coord_dims if dim in datavar_dims]
     data_dims = None
     if common_dims:
         # Calculate the offset of each common dimension.
diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
index b65ab70792..5abc525109 100644
--- a/lib/iris/fileformats/cf.py
+++ b/lib/iris/fileformats/cf.py
@@ -790,15 +790,27 @@ def cf_label_data(self, cf_data_var):
 
         # Determine the name of the label string (or length) dimension by
         # finding the dimension name that doesn't exist within the data dimensions.
-        str_dim_name = list(set(self.dimensions) - set(cf_data_var.dimensions))
+        str_dim_names = list(set(self.dimensions) - set(cf_data_var.dimensions))
+        n_nondata_dims = len(str_dim_names)
+
+        if n_nondata_dims == 0:
+            # *All* dims are shared with the data-variable.
+            # This is only ok if the data-var is *also* a string type.
+            dim_ok = _is_str_dtype(cf_data_var)
+            # In this case, we must just *assume* that the last dimension is "the"
+            #  string dimension
+            str_dim_name = self.dimensions[-1]
+        else:
+            # If there is exactly one non-data dim, that is the one we want
+            dim_ok = len(str_dim_names) == 1
+            (str_dim_name,) = str_dim_names
 
-        if len(str_dim_name) != 1:
+        if not dim_ok:
             raise ValueError(
                 "Invalid string dimensions for CF-netCDF label variable %r"
                 % self.cf_name
             )
 
-        str_dim_name = str_dim_name[0]
         label_data = self[:]
 
         if ma.isMaskedArray(label_data):
diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
index 35588eb2c4..9e18be4395 100644
--- a/lib/iris/fileformats/netcdf/_thread_safe_nc.py
+++ b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -310,14 +310,39 @@ def fromcdl(cls, *args, **kwargs):
 class NetCDFDataProxy:
     """A reference to the data payload of a single NetCDF file variable."""
 
-    __slots__ = ("shape", "dtype", "path", "variable_name", "fill_value")
-
-    def __init__(self, shape, dtype, path, variable_name, fill_value):
+    __slots__ = (
+        "shape",
+        "dtype",
+        "path",
+        "variable_name",
+        "fill_value",
+        "is_bytes",
+        "encoding",
+        "string_length",
+    )
+
+    def __init__(
+        self,
+        shape,
+        dtype,
+        path,
+        variable_name,
+        fill_value,
+        encoding: str | None = None,
+        string_length: int = 0,
+    ):
         self.shape = shape
         self.dtype = dtype
         self.path = path
         self.variable_name = variable_name
         self.fill_value = fill_value
+        self.is_bytes = dtype.kind == "S" and dtype.itemsize == 1
+        if self.is_bytes:
+            # We will be returning a different shape : the last dim is the byte-length
+            self.shape = self.shape[:-1]
+            self.dtype = np.dtype(f"U{string_length}")
+        self.encoding = encoding
+        self.string_length = string_length
 
     @property
     def ndim(self):
@@ -337,10 +362,20 @@ def __getitem__(self, keys):
             try:
                 variable = dataset.variables[self.variable_name]
                 # Get the NetCDF variable data and slice.
-                var = variable[keys]
+                data = variable[keys]
+
+                # If bytes, decode to strings
+                if self.is_bytes:
+                    from iris.util import convert_bytesarray_to_strings
+
+                    data = convert_bytesarray_to_strings(
+                        data,
+                        encoding=self.encoding,
+                        string_length=self.string_length,
+                    )
             finally:
                 dataset.close()
-        return np.asanyarray(var)
+        return np.asanyarray(data)
 
     def __repr__(self):
         fmt = (
diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py
index e8d283beb8..6c0b599a4d 100644
--- a/lib/iris/fileformats/netcdf/loader.py
+++ b/lib/iris/fileformats/netcdf/loader.py
@@ -11,6 +11,7 @@
 
 """
 
+import codecs
 from collections.abc import Iterable, Iterator, Mapping
 from contextlib import contextmanager
 from copy import deepcopy
@@ -269,10 +270,36 @@ def _get_cf_var_data(cf_var):
             # Normal NCVariable type:
             total_bytes = cf_var.size * cf_var.dtype.itemsize
 
+        default_encoding = "utf-8"
+        encoding = getattr(cf_var, "_Encoding", None)
+        if encoding is None:
+            # utf-8 is a reasonable "safe" default, equivalent to 'ascii' for ascii data
+            encoding = default_encoding
+        else:
+            try:
+                # Accept + normalise naming of encodings
+                encoding = codecs.lookup(encoding).name
+                # NOTE: if encoding does not suit data, errors can occur.
+                # For example, _Encoding = "ascii", with non-ascii content.
+            except LookupError:
+                # Replace some invalid setting with "safe"(ish) fallback.
+                encoding = default_encoding
+
+        string_length = getattr(cf_var, "iris_string_length", None)
+
         if total_bytes < _LAZYVAR_MIN_BYTES:
             # Don't make a lazy array, as it will cost more memory AND more time to access.
             result = cf_var[:]
 
+            if result.dtype.kind == "S":
+                from iris.util import convert_bytesarray_to_strings
+
+                result = convert_bytesarray_to_strings(
+                    result,
+                    encoding=encoding,
+                    string_length=string_length,
+                )
+
             # Special handling of masked scalar value; this will be returned as
             # an `np.ma.masked` instance which will lose the original dtype.
             # Workaround for this it return a 1-element masked array of the
@@ -295,8 +322,17 @@ def _get_cf_var_data(cf_var):
                 "_FillValue",
                 _thread_safe_nc.default_fillvals[fill_dtype],
             )
+
+            # NOTE: if the data is bytes which need to be converted to strings on read,
+            #  the data-proxy will do that (and it modifies its shape + dtype).
             proxy = NetCDFDataProxy(
-                cf_var.shape, dtype, cf_var.filename, cf_var.cf_name, fill_value
+                cf_var.shape,
+                dtype,
+                cf_var.filename,
+                cf_var.cf_name,
+                fill_value,
+                encoding=encoding,
+                string_length=string_length,
             )
             # Get the chunking specified for the variable : this is either a shape, or
             # maybe the string "contiguous".
diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index bd4e87471f..d885387a7f 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1578,8 +1578,8 @@ def _get_element_variable_name(self, cube_or_mesh, element):
             name = element.standard_name or element.long_name
             if not name or set(name).intersection(string.whitespace):
                 # We need to invent a name, based on its associated dimensions.
-                if cube is not None and cube.elements(element):
-                    # It is a regular cube elementinate.
+                if cube is not None and cube.coords(element):
+                    # It is a regular cube coordinate.
                     # Auto-generate a name based on the dims.
                     name = ""
                     for dim in cube.coord_dims(element):
diff --git a/lib/iris/tests/integration/netcdf/test_chararrays.py b/lib/iris/tests/integration/netcdf/test_chararrays.py
index a3ce9f9128..8f29fcdcd5 100644
--- a/lib/iris/tests/integration/netcdf/test_chararrays.py
+++ b/lib/iris/tests/integration/netcdf/test_chararrays.py
@@ -4,10 +4,18 @@
 
 import iris
 
+iris.FUTURE.save_split_attrs = True
+
+
 NX, N_STRLEN = 3, 64
 TEST_STRINGS = ["Münster", "London", "Amsterdam"]
 TEST_COORD_VALS = ["bun", "éclair", "sandwich"]
 
+# VARS_COORDS_SHARE_STRING_DIM = True
+VARS_COORDS_SHARE_STRING_DIM = False
+if VARS_COORDS_SHARE_STRING_DIM:
+    TEST_COORD_VALS[-1] = "Xsandwich"  # makes the max coord strlen same as data one
+
 
 def convert_chararray(string_array_1d, maxlen, encoding="utf-8"):
     bbytes = [text.encode(encoding) for text in string_array_1d]
@@ -17,9 +25,33 @@ def convert_chararray(string_array_1d, maxlen, encoding="utf-8"):
     return chararray
 
 
+def convert_bytesarray_to_strings(
+    byte_array, encoding="utf-8", string_length: int | None = None
+):
+    """Convert bytes to strings.
+
+    N.B. for now at least, we assume the string dim is **always the last one**.
+    """
+    bytes_shape = byte_array.shape
+    var_shape = bytes_shape[:-1]
+    if string_length is None:
+        string_length = bytes_shape[-1]
+    string_dtype = f"U{string_length}"
+    result = np.empty(var_shape, dtype=string_dtype)
+    for ndindex in np.ndindex(var_shape):
+        element_bytes = byte_array[ndindex]
+        bytes = b"".join([b if b else b"\0" for b in element_bytes])
+        string = bytes.decode(encoding)
+        result[ndindex] = string
+    return result
+
+
 INCLUDE_COORD = True
 # INCLUDE_COORD = False
 
+INCLUDE_NUMERIC_AUXCOORD = True
+# INCLUDE_NUMERIC_AUXCOORD = False
+
 
 def make_testfile(filepath, chararray, coordarray, encoding_str=None):
     with nc.Dataset(filepath, "w") as ds:
@@ -40,6 +72,13 @@ def make_testfile(filepath, chararray, coordarray, encoding_str=None):
             v_co[:] = coordarray
             if encoding_str is not None:
                 v_co._Encoding = encoding_str
+            if INCLUDE_NUMERIC_AUXCOORD:
+                v_num = ds.createVariable(
+                    "v_num",
+                    float,
+                    dimensions=("x",),
+                )
+                v_num[:] = np.arange(NX)
         v = ds.createVariable(
             "v",
             "S1",
@@ -52,7 +91,10 @@ def make_testfile(filepath, chararray, coordarray, encoding_str=None):
         if encoding_str is not None:
             v._Encoding = encoding_str
         if INCLUDE_COORD:
-            v.coordinates = "v_co"
+            coords_str = "v_co"
+            if INCLUDE_NUMERIC_AUXCOORD:
+                coords_str += " v_num"
+            v.coordinates = coords_str
 
 
 def show_result(filepath):
@@ -82,8 +124,10 @@ def show_result(filepath):
             )
         print("-data-")
         print(repr(cube.data))
+        print("-numeric auxcoord data-")
+        print(repr(cube.coord("x").points))
         if INCLUDE_COORD:
-            print("-coord data-")
+            print("-string auxcoord data-")
             try:
                 print(repr(cube.coord("v_co").points))
             except Exception as err2:
@@ -111,3 +155,19 @@ def test_encodings(encoding):
     TEST_COORDARRAY = convert_chararray(TEST_COORD_VALS, N_STRLEN, encoding=do_as)
     make_testfile(filepath, TEST_CHARARRAY, TEST_COORDARRAY, encoding_str=encoding)
     show_result(filepath)
+
+
+# @pytest.mark.parametrize("ndim", [1, 2])
+# def test_convert_bytes_to_strings(ndim: int):
+#     if ndim == 1:
+#         source = convert_strings_to_chararray(TEST_STRINGS, 16)
+#     elif ndim == 2:
+#         source = np.stack([
+#             convert_strings_to_chararray(TEST_STRINGS, 16),
+#             convert_strings_to_chararray(TEST_COORD_VALS, 16),
+#         ])
+#     else:
+#         raise ValueError(f"Unexpected param ndim={ndim}.")
+#     # convert the strings to bytes
+#     result = convert_bytesarray_to_strings(source)
+#     print(result)
diff --git a/lib/iris/util.py b/lib/iris/util.py
index b3ce7941c5..d154d42e16 100644
--- a/lib/iris/util.py
+++ b/lib/iris/util.py
@@ -2999,3 +2999,24 @@ def set(
 
 # Global CML settings object for use as context manager
 CML_SETTINGS: CMLSettings = CMLSettings()
+
+
+def convert_bytesarray_to_strings(
+    byte_array, encoding="utf-8", string_length: int | None = None
+):
+    """Convert bytes to strings.
+
+    N.B. for now at least, we assume the string dim is **always the last one**.
+    """
+    bytes_shape = byte_array.shape
+    var_shape = bytes_shape[:-1]
+    if string_length is None:
+        string_length = bytes_shape[-1]
+    string_dtype = f"U{string_length}"
+    result = np.empty(var_shape, dtype=string_dtype)
+    for ndindex in np.ndindex(var_shape):
+        element_bytes = byte_array[ndindex]
+        bytes = b"".join([b if b else b"\0" for b in element_bytes])
+        string = bytes.decode(encoding)
+        result[ndindex] = string
+    return result

From 29142dc80434e01662751f386d6f7cae7c8c57e6 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Sun, 7 Dec 2025 00:34:53 +0000
Subject: [PATCH 04/10] Revert and amend.

---
 .../fileformats/netcdf/_thread_safe_nc.py     | 45 +++----------------
 lib/iris/fileformats/netcdf/loader.py         | 38 +---------------
 lib/iris/fileformats/netcdf/saver.py          |  4 +-
 lib/iris/util.py                              | 21 ---------
 4 files changed, 8 insertions(+), 100 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
index 9e18be4395..35588eb2c4 100644
--- a/lib/iris/fileformats/netcdf/_thread_safe_nc.py
+++ b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -310,39 +310,14 @@ def fromcdl(cls, *args, **kwargs):
 class NetCDFDataProxy:
     """A reference to the data payload of a single NetCDF file variable."""
 
-    __slots__ = (
-        "shape",
-        "dtype",
-        "path",
-        "variable_name",
-        "fill_value",
-        "is_bytes",
-        "encoding",
-        "string_length",
-    )
-
-    def __init__(
-        self,
-        shape,
-        dtype,
-        path,
-        variable_name,
-        fill_value,
-        encoding: str | None = None,
-        string_length: int = 0,
-    ):
+    __slots__ = ("shape", "dtype", "path", "variable_name", "fill_value")
+
+    def __init__(self, shape, dtype, path, variable_name, fill_value):
         self.shape = shape
         self.dtype = dtype
         self.path = path
         self.variable_name = variable_name
         self.fill_value = fill_value
-        self.is_bytes = dtype.kind == "S" and dtype.itemsize == 1
-        if self.is_bytes:
-            # We will be returning a different shape : the last dim is the byte-length
-            self.shape = self.shape[:-1]
-            self.dtype = np.dtype(f"U{string_length}")
-        self.encoding = encoding
-        self.string_length = string_length
 
     @property
     def ndim(self):
@@ -362,20 +337,10 @@ def __getitem__(self, keys):
             try:
                 variable = dataset.variables[self.variable_name]
                 # Get the NetCDF variable data and slice.
-                data = variable[keys]
-
-                # If bytes, decode to strings
-                if self.is_bytes:
-                    from iris.util import convert_bytesarray_to_strings
-
-                    data = convert_bytesarray_to_strings(
-                        data,
-                        encoding=self.encoding,
-                        string_length=self.string_length,
-                    )
+                var = variable[keys]
             finally:
                 dataset.close()
-        return np.asanyarray(data)
+        return np.asanyarray(var)
 
     def __repr__(self):
         fmt = (
diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py
index 6c0b599a4d..e8d283beb8 100644
--- a/lib/iris/fileformats/netcdf/loader.py
+++ b/lib/iris/fileformats/netcdf/loader.py
@@ -11,7 +11,6 @@
 
 """
 
-import codecs
 from collections.abc import Iterable, Iterator, Mapping
 from contextlib import contextmanager
 from copy import deepcopy
@@ -270,36 +269,10 @@ def _get_cf_var_data(cf_var):
             # Normal NCVariable type:
             total_bytes = cf_var.size * cf_var.dtype.itemsize
 
-        default_encoding = "utf-8"
-        encoding = getattr(cf_var, "_Encoding", None)
-        if encoding is None:
-            # utf-8 is a reasonable "safe" default, equivalent to 'ascii' for ascii data
-            encoding = default_encoding
-        else:
-            try:
-                # Accept + normalise naming of encodings
-                encoding = codecs.lookup(encoding).name
-                # NOTE: if encoding does not suit data, errors can occur.
-                # For example, _Encoding = "ascii", with non-ascii content.
-            except LookupError:
-                # Replace some invalid setting with "safe"(ish) fallback.
-                encoding = default_encoding
-
-        string_length = getattr(cf_var, "iris_string_length", None)
-
         if total_bytes < _LAZYVAR_MIN_BYTES:
             # Don't make a lazy array, as it will cost more memory AND more time to access.
             result = cf_var[:]
 
-            if result.dtype.kind == "S":
-                from iris.util import convert_bytesarray_to_strings
-
-                result = convert_bytesarray_to_strings(
-                    result,
-                    encoding=encoding,
-                    string_length=string_length,
-                )
-
             # Special handling of masked scalar value; this will be returned as
             # an `np.ma.masked` instance which will lose the original dtype.
             # Workaround for this it return a 1-element masked array of the
@@ -322,17 +295,8 @@ def _get_cf_var_data(cf_var):
                 "_FillValue",
                 _thread_safe_nc.default_fillvals[fill_dtype],
             )
-
-            # NOTE: if the data is bytes which need to be converted to strings on read,
-            #  the data-proxy will do that (and it modifies its shape + dtype).
             proxy = NetCDFDataProxy(
-                cf_var.shape,
-                dtype,
-                cf_var.filename,
-                cf_var.cf_name,
-                fill_value,
-                encoding=encoding,
-                string_length=string_length,
+                cf_var.shape, dtype, cf_var.filename, cf_var.cf_name, fill_value
             )
             # Get the chunking specified for the variable : this is either a shape, or
             # maybe the string "contiguous".
diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index d885387a7f..bd4e87471f 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1578,8 +1578,8 @@ def _get_element_variable_name(self, cube_or_mesh, element):
             name = element.standard_name or element.long_name
             if not name or set(name).intersection(string.whitespace):
                 # We need to invent a name, based on its associated dimensions.
-                if cube is not None and cube.coords(element):
-                    # It is a regular cube coordinate.
+                if cube is not None and cube.elements(element):
+                    # It is a regular cube elementinate.
                     # Auto-generate a name based on the dims.
                     name = ""
                     for dim in cube.coord_dims(element):
diff --git a/lib/iris/util.py b/lib/iris/util.py
index d154d42e16..b3ce7941c5 100644
--- a/lib/iris/util.py
+++ b/lib/iris/util.py
@@ -2999,24 +2999,3 @@ def set(
 
 # Global CML settings object for use as context manager
 CML_SETTINGS: CMLSettings = CMLSettings()
-
-
-def convert_bytesarray_to_strings(
-    byte_array, encoding="utf-8", string_length: int | None = None
-):
-    """Convert bytes to strings.
-
-    N.B. for now at least, we assume the string dim is **always the last one**.
-    """
-    bytes_shape = byte_array.shape
-    var_shape = bytes_shape[:-1]
-    if string_length is None:
-        string_length = bytes_shape[-1]
-    string_dtype = f"U{string_length}"
-    result = np.empty(var_shape, dtype=string_dtype)
-    for ndindex in np.ndindex(var_shape):
-        element_bytes = byte_array[ndindex]
-        bytes = b"".join([b if b else b"\0" for b in element_bytes])
-        string = bytes.decode(encoding)
-        result[ndindex] = string
-    return result

From 1e10dc8a164424a7cf4e4bf22f3df94641eb6bff Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 29 Oct 2025 12:23:07 +0000
Subject: [PATCH 05/10] Hack to preserve the existing order of attributes on
 saved Coords and Cubes.

---
 lib/iris/fileformats/netcdf/saver.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index bd4e87471f..8fb0fec377 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1682,12 +1682,26 @@ def _set_cf_var_attributes(self, cf_var, element):
         else:
             units_str = str(element.units)
 
-        if cf_units.as_unit(units_str).is_udunits():
-            _setncattr(cf_var, "units", units_str)
+        # NB this bit is a nasty hack to preserve existing behaviour through a refactor:
+        #  The attributes for Coords are created in the order units, standard_name,
+        #   whereas for data-variables (aka Cubes) it is the other way around.
+        # Needed now that this routine is also called from _create_cf_data_variable.
+        # TODO: when we can break things, rationalise these to be the same.
+        def add_units():
+            if cf_units.as_unit(units_str).is_udunits():
+                _setncattr(cf_var, "units", units_str)
+
+        def add_stdname():
+            standard_name = element.standard_name
+            if standard_name is not None:
+                _setncattr(cf_var, "standard_name", standard_name)
 
-        standard_name = element.standard_name
-        if standard_name is not None:
-            _setncattr(cf_var, "standard_name", standard_name)
+        if isinstance(element, Cube):
+            add_stdname()
+            add_units()
+        else:
+            add_units()
+            add_stdname()
 
         long_name = element.long_name
         if long_name is not None:

From d853a800ac7b546a3db71a2e348cee0ac15ed8c5 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 29 Oct 2025 14:54:33 +0000
Subject: [PATCH 06/10] Fix for dataless; avoid FUTURE global state change from
 temporary tests.

---
 lib/iris/fileformats/netcdf/saver.py          | 30 ++++----
 .../integration/netcdf/test_chararrays.py     | 72 ++++++++++++++++---
 2 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index 8fb0fec377..c2522d8867 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1812,7 +1812,7 @@ def _create_generic_cf_array_var(
         if cube is not None and data is not None and cube.shape != data.shape:
             compression_kwargs = {}
 
-        if np.issubdtype(data.dtype, np.str_):
+        if not is_dataless and np.issubdtype(data.dtype, np.str_):
             # Deal with string-type variables.
             # Typically CF label variables, but also possibly ancil-vars ?
             string_dimension_depth = data.dtype.itemsize
@@ -1858,8 +1858,13 @@ def _create_generic_cf_array_var(
         else:
             # A normal (numeric) variable.
             # ensure a valid datatype for the file format.
-            element_type = type(element).__name__
-            data = self._ensure_valid_dtype(data, element_type, element)
+            if is_dataless:
+                dtype = self._DATALESS_DTYPE
+                fill_value = self._DATALESS_FILLVALUE
+            else:
+                element_type = type(element).__name__
+                data = self._ensure_valid_dtype(data, element_type, element)
+                dtype = data.dtype.newbyteorder("=")
 
             # Check if this is a dim-coord.
             is_dimcoord = cube is not None and element in cube.dim_coords
@@ -1873,7 +1878,7 @@ def _create_generic_cf_array_var(
             # Create the CF-netCDF variable.
             cf_var = self._dataset.createVariable(
                 cf_name,
-                data.dtype.newbyteorder("="),
+                dtype,
                 element_dims,
                 fill_value=fill_value,
                 **compression_kwargs,
@@ -2325,19 +2330,12 @@ def _create_cf_data_variable(
         # be removed.
         # Get the values in a form which is valid for the file format.
         is_dataless = cube.is_dataless()
-        if is_dataless:
-            data = None
-        else:
-            data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)
 
-        if is_dataless:
-            # The variable must have *some* dtype, and it must be maskable
-            dtype = self._DATALESS_DTYPE
-            fill_value = self._DATALESS_FILLVALUE
-        elif not packing:
-            dtype = data.dtype.newbyteorder("=")
-        else:
-            if isinstance(packing, dict):
+        if not is_dataless:
+            data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)
+            if not packing:
+                dtype = data.dtype.newbyteorder("=")
+            elif isinstance(packing, dict):
                 if "dtype" not in packing:
                     msg = "The dtype attribute is required for packing."
                     raise ValueError(msg)
diff --git a/lib/iris/tests/integration/netcdf/test_chararrays.py b/lib/iris/tests/integration/netcdf/test_chararrays.py
index 8f29fcdcd5..c8bba94671 100644
--- a/lib/iris/tests/integration/netcdf/test_chararrays.py
+++ b/lib/iris/tests/integration/netcdf/test_chararrays.py
@@ -3,9 +3,8 @@
 import pytest
 
 import iris
-
-iris.FUTURE.save_split_attrs = True
-
+from iris.coords import AuxCoord, DimCoord
+from iris.cube import Cube
 
 NX, N_STRLEN = 3, 64
 TEST_STRINGS = ["Münster", "London", "Amsterdam"]
@@ -17,7 +16,13 @@
     TEST_COORD_VALS[-1] = "Xsandwich"  # makes the max coord strlen same as data one
 
 
-def convert_chararray(string_array_1d, maxlen, encoding="utf-8"):
+@pytest.fixture(scope="module", autouse=True)
+def enable_split_attrs():
+    with iris.FUTURE.context(save_split_attrs=True):
+        yield
+
+
+def convert_strings_to_chararray(string_array_1d, maxlen, encoding="utf-8"):
     bbytes = [text.encode(encoding) for text in string_array_1d]
     pad = b"\0" * maxlen
     bbytes = [(x + pad)[:maxlen] for x in bbytes]
@@ -97,6 +102,23 @@ def make_testfile(filepath, chararray, coordarray, encoding_str=None):
             v.coordinates = coords_str
 
 
+def make_testcube(
+    dataarray,
+    coordarray,  # for now, these are always *string* arrays
+    encoding_str: str | None = None,
+):
+    cube = Cube(dataarray, var_name="v")
+    cube.add_dim_coord(DimCoord(np.arange(NX), var_name="x"), 0)
+    if encoding_str is not None:
+        cube.attributes["_Encoding"] = encoding_str
+    if INCLUDE_COORD:
+        co_x = AuxCoord(coordarray, var_name="v_co")
+        if encoding_str is not None:
+            co_x.attributes["_Encoding"] = encoding_str
+        cube.add_aux_coord(co_x, 0)
+    return cube
+
+
 def show_result(filepath):
     from pp_utils import ncdump
 
@@ -115,12 +137,13 @@ def show_result(filepath):
     #     print(repr(v[:]))
     print("\nAs iris cube..")
     try:
+        iris.loading.LOAD_PROBLEMS.reset()
         cube = iris.load_cube(filepath)
         print(cube)
-        if iris.loading.LOAD_PROBLEMS._problems:
+        if iris.loading.LOAD_PROBLEMS.problems:
             print(iris.loading.LOAD_PROBLEMS)
             print(
-                "\n".join(iris.loading.LOAD_PROBLEMS._problems[0].stack_trace.format())
+                "\n".join(iris.loading.LOAD_PROBLEMS.problems[0].stack_trace.format())
             )
         print("-data-")
         print(repr(cube.data))
@@ -136,27 +159,54 @@ def show_result(filepath):
         print(repr(err))
 
 
-# tsts = (None, "ascii", "utf-8", "utf-32",)
+tsts = (
+    None,
+    "ascii",
+    "utf-8",
+    "utf-32",
+)
 # tsts = ("utf-8",)
 # tsts = ("utf-8", "utf-32",)
 # tsts = ("utf-32",)
-tsts = ("utf-8", "ascii", "utf-8")
+# tsts = ("utf-8", "ascii", "utf-8")
 
 
 @pytest.mark.parametrize("encoding", tsts)
-def test_encodings(encoding):
+def test_load_encodings(encoding):
     # small change
     print(f"\n=========\nTesting encoding: {encoding}")
     filepath = f"tmp_{str(encoding)}.nc"
     do_as = encoding
     if encoding != "utf-32":
         do_as = "utf-8"
-    TEST_CHARARRAY = convert_chararray(TEST_STRINGS, N_STRLEN, encoding=do_as)
-    TEST_COORDARRAY = convert_chararray(TEST_COORD_VALS, N_STRLEN, encoding=do_as)
+    TEST_CHARARRAY = convert_strings_to_chararray(
+        TEST_STRINGS, N_STRLEN, encoding=do_as
+    )
+    TEST_COORDARRAY = convert_strings_to_chararray(
+        TEST_COORD_VALS, N_STRLEN, encoding=do_as
+    )
     make_testfile(filepath, TEST_CHARARRAY, TEST_COORDARRAY, encoding_str=encoding)
     show_result(filepath)
 
 
+@pytest.mark.parametrize("encoding", tsts)
+def test_save_encodings(encoding):
+    cube = make_testcube(
+        dataarray=TEST_STRINGS, coordarray=TEST_COORD_VALS, encoding_str=encoding
+    )
+    print(cube)
+    filepath = f"tmp_save_{str(encoding)}.nc"
+    if encoding == "ascii":
+        with pytest.raises(
+            UnicodeEncodeError,
+            match="'ascii' codec can't encode character.*not in range",
+        ):
+            iris.save(cube, filepath)
+    else:
+        iris.save(cube, filepath)
+        show_result(filepath)
+
+
 # @pytest.mark.parametrize("ndim", [1, 2])
 # def test_convert_bytes_to_strings(ndim: int):
 #     if ndim == 1:

From 1bcfb19d6d6990e7aeb8ff372d311b847577cc45 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 29 Oct 2025 15:21:31 +0000
Subject: [PATCH 07/10] Further fix to attribute ordering.

---
 lib/iris/fileformats/netcdf/saver.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index c2522d8867..f80cf154c3 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1687,25 +1687,25 @@ def _set_cf_var_attributes(self, cf_var, element):
         #   whereas for data-variables (aka Cubes) it is the other way around.
         # Needed now that this routine is also called from _create_cf_data_variable.
         # TODO: when we can break things, rationalise these to be the same.
-        def add_units():
+        def add_units_attr():
             if cf_units.as_unit(units_str).is_udunits():
                 _setncattr(cf_var, "units", units_str)
 
-        def add_stdname():
+        def add_names_attrs():
             standard_name = element.standard_name
             if standard_name is not None:
                 _setncattr(cf_var, "standard_name", standard_name)
 
+            long_name = element.long_name
+            if long_name is not None:
+                _setncattr(cf_var, "long_name", long_name)
+
         if isinstance(element, Cube):
-            add_stdname()
-            add_units()
+            add_names_attrs()
+            add_units_attr()
         else:
-            add_units()
-            add_stdname()
-
-        long_name = element.long_name
-        if long_name is not None:
-            _setncattr(cf_var, "long_name", long_name)
+            add_units_attr()
+            add_names_attrs()
 
         # Add the CF-netCDF calendar attribute.
         if element.units.calendar:

From ad4960e86a16609ba5b8e0a77e8a0c2537fe7a52 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 29 Oct 2025 18:08:35 +0000
Subject: [PATCH 08/10] Fixes for data packing.

---
 lib/iris/fileformats/netcdf/saver.py | 64 ++++++++++------------------
 1 file changed, 22 insertions(+), 42 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index f80cf154c3..8d66557cab 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1737,6 +1737,7 @@ def _create_generic_cf_array_var(
         element_dims=None,
         fill_value=None,
         compression_kwargs=None,
+        packing_controls: dict | None = None,
         is_dataless=False,
     ):
         """Create theCF-netCDF variable given dimensional_metadata.
@@ -1864,7 +1865,10 @@ def _create_generic_cf_array_var(
             else:
                 element_type = type(element).__name__
                 data = self._ensure_valid_dtype(data, element_type, element)
-                dtype = data.dtype.newbyteorder("=")
+                if not packing_controls:
+                    dtype = data.dtype.newbyteorder("=")
+                else:
+                    dtype = packing_controls["dtype"]
 
             # Check if this is a dim-coord.
             is_dimcoord = cube is not None and element in cube.dim_coords
@@ -1897,6 +1901,10 @@ def _create_generic_cf_array_var(
 
         # Add the data to the CF-netCDF variable.
         if not is_dataless:
+            if packing_controls:
+                # We must set packing attributes (if any), before assigning values.
+                for key, value in packing_controls["attributes"]:
+                    _setncattr(cf_var, key, value)
             self._lazy_stream_data(data=data, cf_var=cf_var)
 
         # Add names + units
@@ -2331,11 +2339,10 @@ def _create_cf_data_variable(
         # Get the values in a form which is valid for the file format.
         is_dataless = cube.is_dataless()
 
-        if not is_dataless:
+        packing_controls = None
+        if packing and not is_dataless:
             data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)
-            if not packing:
-                dtype = data.dtype.newbyteorder("=")
-            elif isinstance(packing, dict):
+            if isinstance(packing, dict):
                 if "dtype" not in packing:
                     msg = "The dtype attribute is required for packing."
                     raise ValueError(msg)
@@ -2373,26 +2380,14 @@ def _create_cf_data_variable(
                     else:
                         add_offset = cmin + 2 ** (n - 1) * scale_factor
 
-        def set_packing_ncattrs(cfvar):
-            """Set netCDF packing attributes.
-
-            NOTE: cfvar needs to be a _thread_safe_nc._ThreadSafeWrapper subclass.
+            packing_controls = {
+                "dtype": dtype,
+                "attributes": [
+                    ("scale_factor", scale_factor),
+                    ("add_offset", add_offset),
+                ],
+            }
 
-            """
-            assert hasattr(cfvar, "THREAD_SAFE_FLAG")
-            if packing:
-                if scale_factor:
-                    _setncattr(cfvar, "scale_factor", scale_factor)
-                if add_offset:
-                    _setncattr(cfvar, "add_offset", add_offset)
-
-        # cf_name = self._get_element_variable_name(cube_or_mesh=None, element=cube)
-        # while cf_name in self._dataset.variables:
-        #     cf_name = self._increment_name(cf_name)
-        #
-        # cf_var = self._dataset.createVariable(
-        #     cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs
-        # )
         # Create the cube CF-netCDF data variable with data payload.
         cf_name = self._create_generic_cf_array_var(
             cube,
@@ -2401,28 +2396,13 @@ def set_packing_ncattrs(cfvar):
             element_dims=dimension_names,
             fill_value=fill_value,
             compression_kwargs=kwargs,
+            packing_controls=packing_controls,
             is_dataless=is_dataless,
         )
         cf_var = self._dataset.variables[cf_name]
 
-        if not is_dataless:
-            set_packing_ncattrs(cf_var)
-
-        # if cube.standard_name:
-        #     _setncattr(cf_var, "standard_name", cube.standard_name)
-        #
-        # if cube.long_name:
-        #     _setncattr(cf_var, "long_name", cube.long_name)
-        #
-        # if cube.units.is_udunits():
-        #     _setncattr(cf_var, "units", str(cube.units))
-        #
-        # # Add the CF-netCDF calendar attribute.
-        # if cube.units.calendar:
-        #     _setncattr(cf_var, "calendar", cube.units.calendar)
-
-        # Set attributes: NB this part is cube-specific (not the same for components)
-        # - therefore 'set_cf_var_attributes' doesn't set attributes if element is a Cube
+        # Set general attrs: NB this part is cube-specific (not the same for components)
+        # - so 'set_cf_var_attributes' *doesn't* set these, if element is a Cube
         if iris.FUTURE.save_split_attrs:
             attr_names = cube.attributes.locals.keys()
         else:

From ca5872a7a10da9df9258d29ca17f4692ffeb319f Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Sun, 7 Dec 2025 00:42:34 +0000
Subject: [PATCH 09/10] Latest test-chararrays.

---
 .../integration/netcdf/test_chararrays.py     | 61 +++++++++++--------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/lib/iris/tests/integration/netcdf/test_chararrays.py b/lib/iris/tests/integration/netcdf/test_chararrays.py
index c8bba94671..0eb211c8b0 100644
--- a/lib/iris/tests/integration/netcdf/test_chararrays.py
+++ b/lib/iris/tests/integration/netcdf/test_chararrays.py
@@ -1,10 +1,19 @@
-import netCDF4 as nc
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Integration tests for string data handling."""
+
+import subprocess
+
 import numpy as np
 import pytest
 
 import iris
 from iris.coords import AuxCoord, DimCoord
 from iris.cube import Cube
+from iris.fileformats.netcdf import _thread_safe_nc
+from iris.tests import env_bin_path
 
 NX, N_STRLEN = 3, 64
 TEST_STRINGS = ["Münster", "London", "Amsterdam"]
@@ -16,6 +25,7 @@
     TEST_COORD_VALS[-1] = "Xsandwich"  # makes the max coord strlen same as data one
 
 
+# Ensure all tests run with "split attrs" turned on.
 @pytest.fixture(scope="module", autouse=True)
 def enable_split_attrs():
     with iris.FUTURE.context(save_split_attrs=True):
@@ -59,7 +69,8 @@ def convert_bytesarray_to_strings(
 
 
 def make_testfile(filepath, chararray, coordarray, encoding_str=None):
-    with nc.Dataset(filepath, "w") as ds:
+    ds = _thread_safe_nc.DatasetWrapper(filepath, "w")
+    try:
         ds.createDimension("x", NX)
         ds.createDimension("nstr", N_STRLEN)
         vx = ds.createVariable("x", int, dimensions=("x"))
@@ -100,6 +111,8 @@ def make_testfile(filepath, chararray, coordarray, encoding_str=None):
             if INCLUDE_NUMERIC_AUXCOORD:
                 coords_str += " v_num"
             v.coordinates = coords_str
+    finally:
+        ds.close()
 
 
 def make_testcube(
@@ -119,12 +132,19 @@ def make_testcube(
     return cube
 
 
-def show_result(filepath):
-    from pp_utils import ncdump
+NCDUMP_PATHSTR = str(env_bin_path("ncdump"))
+
+
+def ncdump(nc_path: str, *args):
+    """Call ncdump to print a dump of a file."""
+    call_args = [NCDUMP_PATHSTR, nc_path] + list(*args)
+    subprocess.run(call_args, check=True)
 
+
+def show_result(filepath):
     print(f"File {filepath}")
     print("NCDUMP:")
-    ncdump(filepath, "")
+    ncdump(filepath)
     # with nc.Dataset(filepath, "r") as ds:
     #     v = ds.variables["v"]
     #     print("\n----\nNetcdf data readback (basic)")
@@ -159,6 +179,13 @@ def show_result(filepath):
         print(repr(err))
 
 
+@pytest.fixture(scope="session")
+def save_dir(tmp_path_factory):
+    return tmp_path_factory.mktemp("save_files")
+
+
+# TODO: the tests don't test things properly yet, they just exercise the code and print
+#  things for manual debugging.
 tsts = (
     None,
     "ascii",
@@ -172,10 +199,10 @@ def show_result(filepath):
 
 
 @pytest.mark.parametrize("encoding", tsts)
-def test_load_encodings(encoding):
+def test_load_encodings(encoding, save_dir):
     # small change
     print(f"\n=========\nTesting encoding: {encoding}")
-    filepath = f"tmp_{str(encoding)}.nc"
+    filepath = save_dir / f"tmp_load_{str(encoding)}.nc"
     do_as = encoding
     if encoding != "utf-32":
         do_as = "utf-8"
@@ -190,12 +217,12 @@ def test_load_encodings(encoding):
 
 
 @pytest.mark.parametrize("encoding", tsts)
-def test_save_encodings(encoding):
+def test_save_encodings(encoding, save_dir):
     cube = make_testcube(
         dataarray=TEST_STRINGS, coordarray=TEST_COORD_VALS, encoding_str=encoding
     )
     print(cube)
-    filepath = f"tmp_save_{str(encoding)}.nc"
+    filepath = save_dir / f"tmp_save_{str(encoding)}.nc"
     if encoding == "ascii":
         with pytest.raises(
             UnicodeEncodeError,
@@ -205,19 +232,3 @@ def test_save_encodings(encoding):
     else:
         iris.save(cube, filepath)
         show_result(filepath)
-
-
-# @pytest.mark.parametrize("ndim", [1, 2])
-# def test_convert_bytes_to_strings(ndim: int):
-#     if ndim == 1:
-#         source = convert_strings_to_chararray(TEST_STRINGS, 16)
-#     elif ndim == 2:
-#         source = np.stack([
-#             convert_strings_to_chararray(TEST_STRINGS, 16),
-#             convert_strings_to_chararray(TEST_COORD_VALS, 16),
-#         ])
-#     else:
-#         raise ValueError(f"Unexpected param ndim={ndim}.")
-#     # convert the strings to bytes
-#     result = convert_bytesarray_to_strings(source)
-#     print(result)

From a8d022e4613e8f87b6fbb9f22dd0695dcefa63ee Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Sun, 7 Dec 2025 10:43:18 +0000
Subject: [PATCH 10/10] Fix search+replace error.

---
 lib/iris/fileformats/netcdf/saver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index 8d66557cab..4766054142 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1578,8 +1578,8 @@ def _get_element_variable_name(self, cube_or_mesh, element):
             name = element.standard_name or element.long_name
             if not name or set(name).intersection(string.whitespace):
                 # We need to invent a name, based on its associated dimensions.
-                if cube is not None and cube.elements(element):
-                    # It is a regular cube elementinate.
+                if cube is not None and cube.coords(element):
+                    # It is a regular cube coordinate.
                     # Auto-generate a name based on the dims.
                     name = ""
                     for dim in cube.coord_dims(element):