Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow data to be omitted from netCDF files during cfdm.write #222

Merged
merged 12 commits into from
Oct 31, 2022
2 changes: 2 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Version 1.10.0.1
(https://github.com/NCAS-CMS/cfdm/issues/215)
* New method: `cfdm.Field.get_original_filenames`
* New method: `cfdm.Data.get_original_filenames`
* New keyword parameter to `cfdm.write`: ``omit_data``
(https://github.com/NCAS-CMS/cfdm/issues/221)
* Fixed bug that caused incorrect data assignment with some multiple
list indices (https://github.com/NCAS-CMS/cfdm/issues/217)
* Fixed bug that caused a failure when printing date-time data with
Expand Down
19 changes: 19 additions & 0 deletions cfdm/cfdmimplementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,25 @@ def get_construct_data_axes(self, field, key):
except KeyError:
return None

def get_construct_type(self, variable):
"""Return the construct type of a variable.

.. versionadded:: (cfdm) 1.10.0.1

:Parameters:

variable: object
The object to get a construct type from.

:Returns:

`str` or `None`
The construct type, or `None` if the variable is not a
construct.

"""
return getattr(variable, "construct_type", None)

def get_constructs(self, field, axes=(), data=False):
"""Return constructs that span particular axes.

Expand Down
1 change: 0 additions & 1 deletion cfdm/examplefield.py
Original file line number Diff line number Diff line change
Expand Up @@ -4677,7 +4677,6 @@ def example_field(n, _implementation=_implementation):

# auxiliary_coordinate: Z
c = AuxiliaryCoordinate()
c.nc_set_variable("z")
c.set_geometry("polygon")
b = Bounds()
b.set_properties(
Expand Down
89 changes: 76 additions & 13 deletions cfdm/read_write/netcdf/netcdfwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,10 @@ def _write_dimension_coordinate(self, f, key, coord, ncdim, coordinates):

# Create a new dimension coordinate variable
self._write_netcdf_variable(
ncvar, ncdimensions, coord, extra=extra
ncvar,
ncdimensions,
coord,
extra=extra,
)
else:
ncvar = seen[id(coord)]["ncvar"]
Expand Down Expand Up @@ -758,7 +761,7 @@ def _write_count_variable(

extra = {"sample_dimension": sample_ncdim}

# Create a new list variable
# Create a new count variable
self._write_netcdf_variable(
ncvar, (ncdim,), count_variable, extra=extra
)
Expand Down Expand Up @@ -819,7 +822,7 @@ def _write_index_variable(
size=self.implementation.get_data_size(index_variable),
)

# Create a new list variable
# Create a new index variable
extra = {"instance_dimension": instance_dimension}
self._write_netcdf_variable(
ncvar, (ncdim,), index_variable, extra=extra
Expand Down Expand Up @@ -1333,7 +1336,14 @@ def _write_bounds(
omit.append(prop)

# Create the bounds netCDF variable
self._write_netcdf_variable(ncvar, ncdimensions, bounds, omit=omit)
self._write_netcdf_variable(
ncvar,
ncdimensions,
bounds,
omit=omit,
omit_data=self.implementation.get_construct_type(coord)
in g["omit_data"],
)

extra["bounds"] = ncvar
axes = self.implementation.get_construct_data_axes(f, coord_key)
Expand Down Expand Up @@ -1493,7 +1503,13 @@ def _write_node_coordinates(self, coord, coord_ncvar, coord_ncdimensions):
ncvar = self._netcdf_name(ncvar)

# Create the netCDF node coordinates variable
self._write_netcdf_variable(ncvar, (ncdim,), nodes)
self._write_netcdf_variable(
ncvar,
(ncdim,),
nodes,
omit_data=self.implementation.get_construct_type(coord)
in g["omit_data"],
)

encodings = {}

Expand Down Expand Up @@ -1984,7 +2000,13 @@ def _write_interior_ring(self, coord, bounds, encodings):
ncvar = self._netcdf_name(ncvar)

# Create the netCDF interior ring variable
self._write_netcdf_variable(ncvar, (ncdim,), interior_ring)
self._write_netcdf_variable(
ncvar,
(ncdim,),
interior_ring,
omit_data=self.implementation.get_construct_type(coord)
in g["omit_data"],
)

g["part_ncdim"] = ncdim

Expand Down Expand Up @@ -2041,7 +2063,10 @@ def _write_scalar_coordinate(

# Create a new scalar coordinate variable
self._write_netcdf_variable(
ncvar, (), scalar_coord, extra=bounds_extra
ncvar,
(),
scalar_coord,
extra=bounds_extra,
)

else:
Expand Down Expand Up @@ -2134,7 +2159,10 @@ def _write_auxiliary_coordinate(self, f, key, coord, coordinates):
# Create a new auxiliary coordinate variable, if it has data
if self.implementation.get_data(coord, None) is not None:
self._write_netcdf_variable(
ncvar, ncdimensions, coord, extra=extra
ncvar,
ncdimensions,
coord,
extra=extra,
)

g["key_to_ncvar"][key] = ncvar
Expand Down Expand Up @@ -2503,6 +2531,7 @@ def _write_netcdf_variable(
fill=False,
data_variable=False,
domain_variable=False,
omit_data=None,
):
"""Creates a new netCDF variable for a construct.

Expand Down Expand Up @@ -2585,12 +2614,20 @@ def _write_netcdf_variable(

logger.info(f" Writing {cfvar!r}") # pragma: no cover

# Set 'omit_data'
if omit_data is None:
omit_data = (
self.implementation.get_construct_type(cfvar) in g["omit_data"]
)

# ------------------------------------------------------------
# Find the fill value - the value that the variable's data get
# filled before any data is written. if the fill value is
# False then the variable is not pre-filled.
# ------------------------------------------------------------
if fill or g["post_dry_run"]: # or append mode's appending iteration
if (
omit_data or fill or g["post_dry_run"]
): # or append mode's appending iteration
fill_value = self.implementation.get_property(
cfvar, "_FillValue", None
)
Expand Down Expand Up @@ -2645,9 +2682,8 @@ def _write_netcdf_variable(
"endian": g["endian"],
"chunksizes": chunksizes,
"least_significant_digit": lsd,
"fill_value": fill_value,
}
if fill_value is not None:
kwargs["fill_value"] = fill_value

# Add compression parameters (but not for vlen strings).
if kwargs["datatype"] != str:
Expand Down Expand Up @@ -2706,7 +2742,7 @@ def _write_netcdf_variable(
# scale_factor or add_offset are set as properties on the
# variable.
# ------------------------------------------------------------
if data is not None:
if data is not None and not omit_data:
# Find the missing data values, if any.
_FillValue = self.implementation.get_property(
cfvar, "_FillValue", None
Expand Down Expand Up @@ -4349,6 +4385,7 @@ def write(
warn_valid=True,
group=True,
coordinates=False,
omit_data=None,
):
"""Write field and domain constructs to a netCDF file.

Expand Down Expand Up @@ -4568,6 +4605,13 @@ def write(

.. versionadded:: (cfdm) 1.8.7.0

omit_data: (sequence of) `str`, optional
Do not write the data of the named construct types.

See `cfdm.write` for details.

.. versionadded:: (cfdm) 1.10.0.1

:Returns:

`None`
Expand All @@ -4582,6 +4626,22 @@ def write(
# Expand file name
filename = os.path.expanduser(os.path.expandvars(filename))

# Parse the omit_data parameter
if omit_data is None:
omit_data = ()
elif isinstance(omit_data, str):
omit_data = (omit_data,)

if "all" in omit_data:
omit_data = (
"field",
"field_ancillary",
"domain_ancillary",
"auxiliary_coordinate",
"cell_measure",
"dimension_coordinate",
)

# ------------------------------------------------------------
# Initialise netCDF write parameters
# ------------------------------------------------------------
Expand Down Expand Up @@ -4654,12 +4714,15 @@ def write(
# Dry run: populate 'seen' dict without actually writing to file.
"dry_run": False,
# To indicate if the previous iteration was a dry run:
"post_dry_run": False,
#
# Note: need write_vars keys to specify dry runs (iterations)
# and subsequent runs despite them being implied by the mode ('r'
# and 'a' for dry_run and post_dry_run respectively) so that the
# mode does not need to be passed to various methods, where a
# pair of such keys seem clearer than one "effective mode" key.
"post_dry_run": False,
# Do not write the data of the named construct types.
"omit_data": omit_data,
}

if mode not in ("w", "a", "r+"):
Expand Down
38 changes: 38 additions & 0 deletions cfdm/read_write/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def write(
warn_valid=True,
group=True,
coordinates=False,
omit_data=None,
_implementation=_implementation,
):
"""Write field and domain constructs to a netCDF file.
Expand Down Expand Up @@ -493,6 +494,42 @@ def write(

.. versionadded:: (cfdm) 1.8.7.0

omit_data: (sequence of) `str`, optional
sadielbartholomew marked this conversation as resolved.
Show resolved Hide resolved
Do not write the data of the named construct types.

This does not affect the amount of netCDF variables and
dimensions that are written to the file, nor the netCDF
variables' attributes, but does not create data on disk
for the requested variables. The resulting file will be
smaller than it otherwise would have been, and when the
new file is read then the data of these variables will be
represented by an array of all missing data.

The *omit_data* parameter may be one, or a sequence, of:

========================== ===============================
*omit_data* Construct types
========================== ===============================
``'field'`` Field constructs
``'field_ancillary'`` Field ancillary constructs
``'domain_ancillary'`` Domain ancillary constructs
``'dimension_coordinate'`` Dimension coordinate constructs
``'auxiliary_coordinate'`` Auxiliary coordinate constructs
``'cell_measure'`` Cell measure constructs
``'all'`` All of the above constructs
========================== ===============================

*Parameter example:*
To omit the data from only field constructs:
``omit_data='field'`` or ``omit_data=['field']``.

*Parameter example:*
To omit the data from domain ancillary and cell measure
constucts: ``omit_data=['domain_ancillary',
'cell_measure']``.

.. versionadded:: (cfdm) 1.10.0.1

_implementation: (subclass of) `CFDMImplementation`, optional
Define the CF data model implementation that defines field
and metadata constructs and their components.
Expand Down Expand Up @@ -541,4 +578,5 @@ def write(
group=group,
coordinates=coordinates,
extra_write_vars=None,
omit_data=omit_data,
)
33 changes: 33 additions & 0 deletions cfdm/test/test_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,39 @@ def test_read_original_filenames(self):
set((cfdm.abspath(parent_file), cfdm.abspath(external_file))),
)

def test_write_omit_data(self):
"""Test the `omit_data` parameter to `write`."""
f = cfdm.example_field(1)
cfdm.write(f, tmpfile)

cfdm.write(f, tmpfile, omit_data="all")
g = cfdm.read(tmpfile)
self.assertEqual(len(g), 1)
g = g[0]

# Check that the data are missing
sadielbartholomew marked this conversation as resolved.
Show resolved Hide resolved
self.assertFalse(g.array.count())
self.assertFalse(g.construct("grid_latitude").array.count())

# Check that a dump works
g.dump(display=False)

cfdm.write(f, tmpfile, omit_data=("field", "dimension_coordinate"))
g = cfdm.read(tmpfile)[0]

# Check that only the field and dimension coordinate data are
# missing
self.assertFalse(g.array.count())
self.assertFalse(g.construct("grid_latitude").array.count())
self.assertTrue(g.construct("latitude").array.count())

cfdm.write(f, tmpfile, omit_data="field")
g = cfdm.read(tmpfile)[0]

# Check that only the field data are missing
self.assertFalse(g.array.count())
self.assertTrue(g.construct("grid_latitude").array.count())


if __name__ == "__main__":
print("Run date:", datetime.datetime.now())
Expand Down