From da9a0bd436273b490246fc754642b82bfbdf9c5a Mon Sep 17 00:00:00 2001 From: Jess <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Thu, 16 Feb 2023 14:52:26 -0500 Subject: [PATCH] update xarray and intake-esm conda packages (#447) * update xarray and intake-esm conda packages add ecgtools package to the base environment * update cf-xarray to 0.8.0 in base and dev env files * update xarray version to 2023.2.0 in base env file * replace deprecated from_df call with call to intake_esm_datastore add required aggregatation_control entry to _dummy_esmcol_spec in generate_catalog * replace axes with axes_values method in xr_parser.MDTFCFDatasetAccessorMixin and dependent methods to avoid issues with the xarray check_coordinate_axis method. This original overriding axes definition resulted in this attribute not being defined inthe cf dict that is queried by check_coordinate_axis. If the axes method was set as @property, it was defined, but the check_coordinate_axis expects a single value for each key, not a list. Thus, axes_values defines the list of values used by the framework, and axes is correctly instantiated for parsing by xarray * fix logic in xr_parser._old_axes_dict to deal with 2-D axes assigned to X and Y axis in the cf.coordinates dict entries that do not match cf.coordinates values in the axes object are deleted from dims_list and coords_list entries that do match the cf.coordinates values that are missing from dims_list and coords_list are appended the result is a single value assigned to the X and Y axes instead of, for example a list with [lat, nlat] or [lon, nlon] * general cleanup added more descriptive logging messages for single run teardown method to the environment manager --- src/conda/_env_synthetic_data.yml | 6 +- src/conda/env_base.yml | 8 +- src/conda/env_dev.yml | 6 +- src/conda/env_python3_base.yml | 2 +- src/environment_manager.py | 12 ++- src/output_manager.py | 4 +- src/preprocessor.py | 14 +-- src/query_fetch_preprocess.py | 18 ++-- src/xr_parser.py | 153 ++++++++++++++++++------------ 9 files changed, 135 insertions(+), 88 deletions(-) diff --git a/src/conda/_env_synthetic_data.yml b/src/conda/_env_synthetic_data.yml index 2d6d32332..1ccff9174 100644 --- a/src/conda/_env_synthetic_data.yml +++ b/src/conda/_env_synthetic_data.yml @@ -8,7 +8,7 @@ dependencies: - numpy=1.22.3 - netCDF4=1.5.8 - cftime>=1.6 -- xarray>=2022.06.0 +- xarray=2022.11.0 - setuptools>=49.1 - esmf=8.2.0 - esmpy=8.2.0 @@ -16,8 +16,8 @@ dependencies: - python-dateutil >= 2.8.0 - pandas>=1.5 - pytz=2020.4 -- pytest >= 6.2.4 -- pyyaml >= 6.0 +- pytest>=6.2.4 +- pyyaml>=6.0 - pip=21.3.1 - pytest>=6.2.4 - pip : diff --git a/src/conda/env_base.yml b/src/conda/env_base.yml index dde9d775e..9d08c90f3 100644 --- a/src/conda/env_base.yml +++ b/src/conda/env_base.yml @@ -12,20 +12,20 @@ dependencies: - numpy=1.22.3 - netCDF4=1.5.8 - cftime=1.6 -- xarray=0.21.0 +- xarray=2023.2.0 # Note: newer versions of cf_xarray are causing issues with missing # xarray dataset attributes. There seem to be modifications where # ds.cf attributes are defined later in the process, and this clashes # with the preprocessing procedures -- cf_xarray=0.5.0 +- cf_xarray=0.8.0 - matplotlib=3.5.3 - pandas=1.5 - pint=0.16 - dask=2022.10.0 -# additions dec 2020 +- ecgtools=2022.10.7 - cfunits=3.3.5 - intake=0.6 -- intake-esm=2021.8.17 +- intake-esm=2022.9.18 - subprocess32=3.5.4 - pyyaml=6.0 - click=8.0.4 diff --git a/src/conda/env_dev.yml b/src/conda/env_dev.yml index 130a523ed..20e955562 100644 --- a/src/conda/env_dev.yml +++ b/src/conda/env_dev.yml @@ -10,7 +10,7 @@ dependencies: - scipy=1.9 - netCDF4=1.5.8 - cftime=1.6 -- xarray=0.21.0 +- xarray=2022.11.0 - matplotlib=3.6 - cartopy=0.21.0 - pandas=1.5 @@ -28,6 +28,6 @@ dependencies: # additions dec 2020 - cfunits=3.3.1 - intake=0.6 -- intake-esm=2021.8.17 +- intake-esm=2022.9.18 # bump version 0.3.1 -> 0.4 feb 2021 -- cf_xarray=0.4.0 +- cf_xarray=0.8.0 diff --git a/src/conda/env_python3_base.yml b/src/conda/env_python3_base.yml index 8c12f23e1..2ff32a038 100644 --- a/src/conda/env_python3_base.yml +++ b/src/conda/env_python3_base.yml @@ -12,7 +12,7 @@ dependencies: - scipy=1.9 - netCDF4=1.5.8 - cftime=1.6 -- xarray=0.21.0 +- xarray=2022.11.0 - matplotlib=3.6 - pandas=1.5 - cartopy=0.21.0 diff --git a/src/environment_manager.py b/src/environment_manager.py index 6d6cd9a66..59e4f788e 100644 --- a/src/environment_manager.py +++ b/src/environment_manager.py @@ -452,7 +452,7 @@ def runtime_exception_handler(self, exc): execution (including setup and clean up). """ chained_exc = util.chain_exc(exc, f"running {self.pod.full_name}.", - util.PodExecutionError) + util.PodExecutionError) self.pod.deactivate(chained_exc) self.tear_down() raise exc # include in production, or just for debugging? @@ -463,6 +463,8 @@ def tear_down(self, retcode=None): if hasattr(self.process, 'retcode'): retcode = self.process.returncode try: + log_str = f" Tearing down runtime process for {self.pod.full_name})." + self.pod.log.info(log_str) self.process.kill() except ProcessLookupError: pass @@ -472,7 +474,11 @@ def tear_down(self, retcode=None): if retcode == 0: log_str = f"{self.pod.full_name} exited successfully (code={retcode})." self.pod.log.info(log_str) - elif retcode is None or self.pod.failed: + elif retcode is None: + log_str = f"{self.pod.full_name} terminated, but the subprocess did not yield a return code." \ + f" This does not necessarily indicate a failure." + self.pod.log.info(log_str) + elif self.pod.failed: log_str = f"{self.pod.full_name} was terminated or exited abnormally." self.pod.log.info(log_str) else: @@ -483,7 +489,7 @@ def tear_down(self, retcode=None): if self.pod.log_file is not None: self.pod.log_file.write(80 * '-' + '\n') self.pod.log_file.write(log_str + '\n') - self.pod.log_file.flush() # redundant? + self.pod.log_file.flush() # redundant? if not self.pod.failed: self.pod.status = core.ObjectStatus.INACTIVE diff --git a/src/output_manager.py b/src/output_manager.py index d1888232a..f91b83cfe 100644 --- a/src/output_manager.py +++ b/src/output_manager.py @@ -28,9 +28,11 @@ def html_templating_dict(pod): d[attr] = str(getattr(pod, attr, "")) return d -class HTMLSourceFileMixin(): + +class HTMLSourceFileMixin: """Convienience method to define location of html templates in one place. """ + @property def CASE_TEMP_HTML(self): """Path to temporary top-level html file for *case* that gets appended diff --git a/src/preprocessor.py b/src/preprocessor.py index f30592bd1..bef2dcae6 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -245,12 +245,12 @@ def process(self, var, ds, *args): if t_start > dt_start_upper: err_str = (f"Error: dataset start ({t_start}) is after " - f"requested date range start ({dt_start_upper}).") + f"requested date range start ({dt_start_upper}).") var.log.error(err_str) raise IndexError(err_str) if t_end < dt_end_lower: err_str = (f"Error: dataset end ({t_end}) is before " - f"requested date range end ({dt_end_lower}).") + f"requested date range end ({dt_end_lower}).") var.log.error(err_str) raise IndexError(err_str) @@ -258,11 +258,11 @@ def process(self, var, ds, *args): new_t = ds.cf.dim_axes(tv_name).get('T') if t_size == new_t.size: var.log.info(("Requested dates for %s coincide with range of dataset " - "'%s -- %s'; left unmodified."), - var.full_name, - new_t.values[0].strftime('%Y-%m-%d'), - new_t.values[-1].strftime('%Y-%m-%d'), - ) + "'%s -- %s'; left unmodified."), + var.full_name, + new_t.values[0].strftime('%Y-%m-%d'), + new_t.values[-1].strftime('%Y-%m-%d'), + ) else: var.log.info("Cropped date range of %s from '%s -- %s' to '%s -- %s'.", var.full_name, diff --git a/src/query_fetch_preprocess.py b/src/query_fetch_preprocess.py index a24d7fdf0..25889b27c 100644 --- a/src/query_fetch_preprocess.py +++ b/src/query_fetch_preprocess.py @@ -169,7 +169,10 @@ def _dummy_esmcol_spec(self): "column_name": self.remote_data_col, "format": self._asset_file_format }, - "last_updated": "2020-12-06" + "last_updated": "2020-12-06", + 'aggregation_control': { + 'variable_column_name': 'variable', 'groupby_attrs': [] + } } @abc.abstractmethod @@ -193,9 +196,10 @@ def setup_query(self): # sep: str = '.', delimiter to use when constructing key for a query # **kwargs: Any - self.catalog = intake_esm.core.esm_datastore.from_df( - self.generate_catalog(), - esmcol_data=self._dummy_esmcol_spec(), + obj = {'df': self.generate_catalog(), 'esmcat': self._dummy_esmcol_spec()} + + self.catalog = intake_esm.core.esm_datastore( + obj, progressbar=False, sep='|' ) @@ -556,7 +560,7 @@ def preprocess_data(self): self.fetch_data() update = False vars_to_process = [ - pv for pv in self.iter_vars(active=True) \ + pv for pv in self.iter_vars(active=True) if pv.var.stage < varlistentry_util.VarlistEntryStage.PREPROCESSED ] if not vars_to_process: @@ -609,7 +613,7 @@ def request_data(self): if p.failed: p.log.debug('Data request for %s failed.', p.full_name) else: - p.log.debug('Data request for %s completed succesfully.', + p.log.debug('Data request for %s completed successfully.', p.full_name) def query_and_fetch_cleanup(self, signum=None, frame=None): @@ -851,7 +855,7 @@ def request_data(self, parent): for p in self.iter_children(): for v in p.iter_children(): if v.status == core.ObjectStatus.ACTIVE: - v.log.debug('Data request for %s completed succesfully.', + v.log.debug('Data request for %s completed successfully.', v.full_name) v.status = core.ObjectStatus.SUCCEEDED elif v.failed: diff --git a/src/xr_parser.py b/src/xr_parser.py index c957ffc9a..821210308 100644 --- a/src/xr_parser.py +++ b/src/xr_parser.py @@ -9,6 +9,7 @@ import itertools import re import warnings +from abc import ABC import cftime # believe explict import needed for cf_xarray date parsing? import cf_xarray @@ -40,6 +41,7 @@ are expected, but not present in the data. """ + @util.mdtf_dataclass class PlaceholderScalarCoordinate(): """Dummy object used to describe `scalar coordinates @@ -61,6 +63,7 @@ class PlaceholderScalarCoordinate(): # Customize behavior of cf_xarray accessor # (https://github.com/xarray-contrib/cf-xarray, https://cf-xarray.readthedocs.io/en/latest/) + def patch_cf_xarray_accessor(mod): """Monkey-patches ``_get_axis_coord``, a module-level function in `cf_xarray `__, @@ -104,8 +107,10 @@ def new_get_axis_coord(var, key): setattr(mod, func_name, new_get_axis_coord) + patch_cf_xarray_accessor(cf_xarray.accessor) + class MDTFCFAccessorMixin(object): """Properties we add to both xarray Dataset and DataArray objects via the accessor `extension mechanism @@ -179,14 +184,39 @@ def _old_axes_dict(self, var_name=None): else: dims_list = subset_dims - for k,v in vardict.items(): + for k, v in vardict.items(): + # handle variables with 2-D X,Y coordinates (e.g., [lon, nlon], [lat, nlat]) + # TODO: This is kluge-y AF, but it works for now. Will aim to better handle 2-D coordinates + # when refactoring the prepocessor if len(v) > 1 and var_name is not None: - _log.error('Too many %s axes found for %s: %s', k, var_name, v) - raise TypeError(f"Too many {k} axes for {var_name}.") + ax = [c for c in v if c in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())] + del_ax = [d for d in v if d not in itertools.chain.from_iterable(axes_obj.cf.coordinates.values())] + if del_ax is not None: # remove the entries that are not in the cf.coordinates.values dict + # append entries that are in the cf.coordinates.values dict if they are missing in coords_list + # and dims_list + if del_ax[0] in coords_list: + coords_list.remove(del_ax[0]) + if ax[0] not in coords_list: + coords_list.append(ax[0]) + if del_ax[0] in dims_list: + dims_list.remove(del_ax[0]) + if ax[0] not in dims_list: + dims_list.append(ax[0]) + + if ax is not None: + vardict[k] = ax + if ax[0] not in coords_list: + _log.warning(("cf_xarray fix: %s axis %s not in dimensions " + "for %s; dropping."), k, ax[0], var_name) + delete_keys.append(k) + else: + coords_list.remove(ax[0]) + if ax[0] in dims_list: + dims_list.remove(ax[0]) elif len(v) == 1: if v[0] not in coords_list: _log.warning(("cf_xarray fix: %s axis %s not in dimensions " - "for %s; dropping."), k, v[0], var_name) + "for %s; dropping."), k, v[0], var_name) delete_keys.append(k) else: coords_list.remove(v[0]) @@ -199,11 +229,11 @@ def _old_axes_dict(self, var_name=None): # didn't assign all dims for this var if len(dims_list) == 1 and len(empty_keys) == 1: _log.warning('cf_xarray fix: assuming %s is %s axis for %s', - dims_list[0], empty_keys[0], var_name) + dims_list[0], empty_keys[0], var_name) vardict[empty_keys[0]] = [dims_list[0]] else: _log.error(("cf_xarray error: couldn't assign %s to axes for %s" - "(assigned axes: %s)"), dims_list, var_name, vardict) + "(assigned axes: %s)"), dims_list, var_name, vardict) raise TypeError(f"Missing axes for {var_name}.") for k in delete_keys: vardict[k] = [] @@ -219,6 +249,7 @@ def axes_set(self): """Returns a frozenset of all axes names.""" return frozenset(self._obj.cf.axes().keys()) + class MDTFCFDatasetAccessorMixin(MDTFCFAccessorMixin): """Methods we add for xarray Dataset objects via the accessor `extension mechanism @@ -236,7 +267,7 @@ def scalar_coords(self, var_name=None): for ax, coord_names in axes_d.items(): for c in coord_names: if c in ds: - if (c not in ds.dims or (ds[c].size == 1 and ax == 'Z')): + if c not in ds.dims or (ds[c].size == 1 and ax == 'Z'): scalars.append(ds[c]) else: if c not in ds.dims: @@ -257,7 +288,7 @@ def get_scalar(self, ax_name, var_name=None): return c return None - def axes(self, var_name=None, filter_set=None): + def axes_values(self, var_name=None, filter_set=None): """Override cf_xarray accessor behavior (from :meth:`~MDTFCFAccessorMixin._old_axes_dict`). @@ -295,7 +326,7 @@ def axes(self, var_name=None, filter_set=None): # we're getting axes for a single variable if len(new_coords) != 1: raise TypeError(f"More than one {ax} axis found for " - f"'{var_name}': {new_coords}.") + f"'{var_name}': {new_coords}.") d[ax] = new_coords[0] else: d[ax] = new_coords @@ -305,7 +336,8 @@ def dim_axes(self, var_name=None): """Override cf_xarray accessor behavior by having values of the 'axes' dict be the Dataset variables themselves, instead of their names. """ - return self.axes(var_name=var_name, filter_set=self._obj.dims) + return self.axes_values(var_name=var_name, filter_set=self._obj.dims) + class MDTFDataArrayAccessorMixin(MDTFCFAccessorMixin): """Methods we add for xarray DataArray objects via the accessor @@ -317,14 +349,14 @@ def dim_axes(self): instead of a list of names as in cf_xarray. Filter on dimension coordinates only (eliminating any scalar coordinates.) """ - return {k:v for k,v in self._obj.cf.axes().items() if v in self._obj.dims} + return {k: v for k, v in self._obj.cf.axes().items() if v in self._obj.dims} def axes(self): """Map axes labels to the (unique) coordinate variable name, instead of a list of names as in cf_xarray. """ d = self._obj.cf._old_axes_dict() - return {k: v[0] for k,v in d.items()} + return {k: v[0] for k, v in d.items()} @property def formula_terms(self): @@ -339,6 +371,7 @@ def formula_terms(self): terms[key] = value return terms + with warnings.catch_warnings(): # cf_xarray registered its accessors under "cf". Re-registering our versions # will work correctly, but raises the following warning, which we suppress. @@ -348,7 +381,7 @@ def formula_terms(self): @xr.register_dataset_accessor("cf") class MDTFCFDatasetAccessor( - MDTFCFDatasetAccessorMixin, cf_xarray.accessor.CFDatasetAccessor + MDTFCFDatasetAccessorMixin, cf_xarray.accessor.CFDatasetAccessor, ABC ): """Accessor that's registered (under the attribute ``cf``) for xarray Datasets. Combines methods in :class:`MDTFCFDatasetAccessorMixin` and the @@ -358,7 +391,7 @@ class MDTFCFDatasetAccessor( @xr.register_dataarray_accessor("cf") class MDTFCFDataArrayAccessor( - MDTFDataArrayAccessorMixin, cf_xarray.accessor.CFDataArrayAccessor + MDTFDataArrayAccessorMixin, cf_xarray.accessor.CFDataArrayAccessor, ABC ): """Accessor that's registered (under the attribute ``cf``) for xarray DataArrays. Combines methods in :class:`MDTFDataArrayAccessorMixin` and @@ -368,7 +401,8 @@ class MDTFCFDataArrayAccessor( # ======================================================================== -class DefaultDatasetParser(): + +class DefaultDatasetParser: """Class containing MDTF-specific methods for cleaning and normalizing xarray metadata. @@ -406,7 +440,7 @@ def setup(self, data_mgr, pod): # --- Methods for initial munging, prior to xarray.decode_cf ------------- def guess_attr(self, attr_desc, attr_name, options, default=None, - comparison_func=None): + comparison_func=None): """Select and return element of *options* equal to *attr_name*. If none are equal, try a case-insensititve string match. @@ -442,8 +476,8 @@ def str_munge(s): if test_count >= 1: return attr_name munged_opts = [ - (comparison_func(str_munge(opt), str_munge(attr_name)), opt) \ - for opt in options + (comparison_func(str_munge(opt), str_munge(attr_name)), opt) + for opt in options ] if sum(tup[0] for tup in munged_opts) == 1: guessed_attr = [tup[1] for tup in munged_opts if tup[0]][0] @@ -531,10 +565,11 @@ def normalize_pre_decode(self, ds): """ def strip_(v): # strip leading, trailing whitespace from all string-valued attributes - return (v.strip() if isinstance(v, str) else v) + return v.strip() if isinstance(v, str) else v + def strip_attrs(obj): d = getattr(obj, 'attrs', dict()) - return {strip_(k): strip_(v) for k,v in d.items()} + return {strip_(k): strip_(v) for k, v in d.items()} setattr(ds, 'attrs', strip_attrs(ds)) for var in ds.variables: @@ -622,7 +657,7 @@ def normalize_dependent_var(self, var, ds): return if not self.guess_names: raise util.MetadataError(f"Variable name '{ds_var_name}' not found " - f"in dataset: ({list(ds.variables)}).") + f"in dataset: ({list(ds.variables)}).") var_names = list(ds.variables.keys()) coord_names = set(ds.coords.keys()) @@ -730,20 +765,20 @@ def compare_attr(self, our_attr_tuple, ds_attr_tuple, comparison_func=None, else: # don't change ds, raise exception raise util.MetadataError((f"No {ds_attr_name} set for '{ds_var.name}'; " - f"expected value '{our_attr}'.")) + f"expected value '{our_attr}'.")) if our_attr is util.NOTSET or (not our_attr): # our_attr wasn't defined if fill_ours: if not (our_attr_name == 'name' and our_var.name == ds_attr): self.log.debug("Updating %s for '%s' to value '%s' from dataset.", - our_attr_name, our_var.name, ds_attr) + our_attr_name, our_var.name, ds_attr) setattr(our_var, our_attr_name, ds_attr) return else: # don't change ds, raise exception raise util.MetadataError((f"No {our_attr_name} set for '{our_var.name}'; " - f"value '{ds_attr}' found in dataset.")) + f"value '{ds_attr}' found in dataset.")) if not comparison_func(our_attr, ds_attr): # both attrs present, but have different values @@ -752,11 +787,11 @@ def compare_attr(self, our_attr_tuple, ds_attr_tuple, comparison_func=None, # update our attr with value from ds, but also raise error setattr(our_var, our_attr_name, ds_attr) raise util.MetadataEvent((f"Unexpected {our_attr_name} for variable " - f"'{our_var.name}': '{ds_attr}' (expected '{our_attr}').")) + f"'{our_var.name}': '{ds_attr}' (expected '{our_attr}').")) elif overwrite_ours: # set our attr to ds value self.log.debug("Updating %s for '%s' to value '%s' from dataset.", - our_attr_name, our_var.name, ds_attr) + our_attr_name, our_var.name, ds_attr) setattr(our_var, our_attr_name, ds_attr) return else: @@ -789,7 +824,7 @@ def reconcile_name(self, our_var, ds_var_name, overwrite_ours=None): ) def reconcile_attr(self, our_var, ds_var, our_attr_name, ds_attr_name=None, - **kwargs): + **kwargs): """Compare attribute of a :class:`~src.data_model.DMVariable` (*our_var*) with what's set in the xarray.Dataset (*ds_var*). """ @@ -825,25 +860,25 @@ def reconcile_names(self, our_var, ds, ds_var_name, overwrite_ours=None): if len(ds_names) == 1: # success, narrowed down to one guess self.log.info(("Selecting '%s' as the intended name for '%s' " - "(= %s; expected '%s')."), ds_names[0], our_var.name, - our_var.standard_name, ds_var_name, - tags=util.ObjectLogTag.BANNER) + "(= %s; expected '%s')."), ds_names[0], our_var.name, + our_var.standard_name, ds_var_name, + tags=util.ObjectLogTag.BANNER) ds_var_name = ds_names[0] overwrite_ours = True # always overwrite for this case else: # failure raise util.MetadataError(f"Variable name '{ds_var_name}' not " - f"found in dataset: ({list(ds.variables)}).") + f"found in dataset: ({list(ds.variables)}).") else: # not guessing; error out raise util.MetadataError(f"Variable name '{ds_var_name}' not found " - f"in dataset: ({list(ds.variables)}).") + f"in dataset: ({list(ds.variables)}).") # in all non-error cases: now that variable has been identified in ds, # straightforward to compare attrs self.reconcile_name(our_var, ds_var_name, overwrite_ours=overwrite_ours) self.reconcile_attr(our_var, ds[ds_var_name], 'standard_name', - fill_ours=True, fill_ds=True) + fill_ours=True, fill_ds=True) def reconcile_units(self, our_var, ds_var): """Reconcile the units attribute between the 'ground truth' of the @@ -857,19 +892,19 @@ def reconcile_units(self, our_var, ds_var): """ # will raise UnitsUndefinedError or log warning if unit attribute missing self.check_metadata(ds_var, 'units') - # Check equivalence of units: if units inequivalent, raise MetadataEvent + # Check equivalence of units: if units are not equivalent, raise MetadataEvent self.reconcile_attr(our_var, ds_var, 'units', - comparison_func=units.units_equivalent, - fill_ours=True, fill_ds=True - ) + comparison_func=units.units_equivalent, + fill_ours=True, fill_ds=True + ) # If that passed, check equality of units. Log unequal units as a warning. # not an exception, since preprocessor can/will convert them. try: # test units only, not quantities+units self.reconcile_attr(our_var, ds_var, 'units', - comparison_func=units.units_equal, - fill_ours=True, fill_ds=True - ) + comparison_func=units.units_equal, + fill_ours=True, fill_ds=True + ) except util.MetadataEvent as exc: self.log.warning("%s %r.", util.exc_descriptor(exc), exc) our_var.units = units.to_cfunits(our_var.units) @@ -889,16 +924,16 @@ def reconcile_time_units(self, our_var, ds_var): self.check_metadata(ds_var, 'units') # Check equivalence of units: if units inequivalent, raise MetadataEvent self.reconcile_attr(our_var, ds_var, 'units', - comparison_func=units.units_reftime_base_eq, - fill_ours=True, fill_ds=False, overwrite_ours=None - ) + comparison_func=units.units_reftime_base_eq, + fill_ours=True, fill_ds=False, overwrite_ours=None + ) self.reconcile_attr(our_var, ds_var, 'units', - comparison_func=units.units_equal, - fill_ours=True, fill_ds=False, overwrite_ours=True - ) + comparison_func=units.units_equal, + fill_ours=True, fill_ds=False, overwrite_ours=True + ) self.reconcile_attr(our_var, ds_var, 'calendar', - fill_ours=True, fill_ds=False, overwrite_ours=True - ) + fill_ours=True, fill_ds=False, overwrite_ours=True + ) def reconcile_scalar_value_and_units(self, our_var, ds_var): """Compare scalar coordinate value of a :class:`~src.data_model.DMVariable` @@ -1005,8 +1040,8 @@ def reconcile_dimension_coords(self, our_var, ds): of the dataset variable, according to the data request. ds: xarray Dataset. """ - for coord in ds.cf.axes(our_var.name).values(): - # .axes() will have thrown TypeError if XYZT axes not all uniquely defined + for coord in ds.cf.axes_values(our_var.name).values(): + # .axes_values() will have thrown TypeError if XYZT axes not all uniquely defined assert isinstance(coord, xr.core.dataarray.DataArray) # check set of dimension coordinates (array dimensionality) agrees @@ -1029,14 +1064,14 @@ def reconcile_dimension_coords(self, our_var, ds): self.reconcile_coord_bounds(coord, ds, ds_coord_name) else: _log.warning(f"Variable {our_var.name} has unexpected dimensionality: " - f" expected axes {list(our_axes_set)}, got {list(ds_axes_set)}.") + f" expected axes {list(our_axes_set)}, got {list(ds_axes_set)}.") for c_name in ds_var.dims: if ds[c_name].size == 1: if c_name == ds_axes['Z']: # mis-identified scalar coordinate self.log.warning(("Dataset has dimension coordinate '%s' of size " - "1 not identified as scalar coord."), c_name) + "1 not identified as scalar coord."), c_name) else: # encounter |X|,|Y| = 1 for single-column models; regardless, # assume user knows what they're doing @@ -1122,9 +1157,9 @@ def _get_calendar(d): self.normalize_calendar(d) return d.get('calendar', None) - t_coords = ds.cf.axes().get('T', []) + t_coords = ds.cf.axes_values().get('T', []) if not t_coords: - return # assume static data + return # assume static data elif len(t_coords) > 1: self.log.error("Found multiple time axes. Ignoring all but '%s'.", t_coords[0].name) @@ -1215,10 +1250,10 @@ def parse(self, var, ds): self.log = var.log self.normalize_pre_decode(ds) ds = xr.decode_cf(ds, - decode_coords=True, # parse coords attr - decode_times=True, - use_cftime=True # use cftime instead of np.datetime64 - ) + decode_coords=True, # parse coords attr + decode_times=True, + use_cftime=True # use cftime instead of np.datetime64 + ) ds = ds.cf.guess_coord_axis() self.restore_attrs_backup(ds) self.normalize_metadata(var, ds) @@ -1226,7 +1261,7 @@ def parse(self, var, ds): self._post_normalize_hook(var, ds) if self.disable: - return ds # stop here; don't attempt to reconcile + return ds # stop here; don't attempt to reconcile if var is not None: self.reconcile_variable(var, ds) self.check_ds_attrs(var, ds)