update xarray and intake-esm conda packages (#447)

* update xarray and intake-esm conda packages add ecgtools package to the base environment * update cf-xarray to 0.8.0 in base and dev env files * update xarray version to 2023.2.0 in base env file * replace deprecated from_df call with call to intake_esm_datastore add required aggregatation_control entry to _dummy_esmcol_spec in generate_catalog * replace axes with axes_values method in xr_parser.MDTFCFDatasetAccessorMixin and dependent methods to avoid issues with the xarray check_coordinate_axis method. This original overriding axes definition resulted in this attribute not being defined inthe cf dict that is queried by check_coordinate_axis. If the axes method was set as @Property, it was defined, but the check_coordinate_axis expects a single value for each key, not a list. Thus, axes_values defines the list of values used by the framework, and axes is correctly instantiated for parsing by xarray * fix logic in xr_parser._old_axes_dict to deal with 2-D axes assigned to X and Y axis in the cf.coordinates dict entries that do not match cf.coordinates values in the axes object are deleted from dims_list and coords_list entries that do match the cf.coordinates values that are missing from dims_list and coords_list are appended the result is a single value assigned to the X and Y axes instead of, for example a list with [lat, nlat] or [lon, nlon] * general cleanup added more descriptive logging messages for single run teardown method to the environment manager
NOAA-GFDL · Feb 16, 2023 · da9a0bd · da9a0bd
1 parent 97c2417
commit da9a0bd
Show file tree

Hide file tree

Showing 9 changed files with 135 additions and 88 deletions.
diff --git a/src/conda/_env_synthetic_data.yml b/src/conda/_env_synthetic_data.yml
@@ -8,16 +8,16 @@ dependencies:
 - numpy=1.22.3
 - netCDF4=1.5.8
 - cftime>=1.6
-- xarray>=2022.06.0
+- xarray=2022.11.0
 - setuptools>=49.1
 - esmf=8.2.0
 - esmpy=8.2.0
 - xesmf=0.6.2
 - python-dateutil >= 2.8.0
 - pandas>=1.5
 - pytz=2020.4
-- pytest >= 6.2.4
-- pyyaml >= 6.0
+- pytest>=6.2.4
+- pyyaml>=6.0
 - pip=21.3.1
 - pytest>=6.2.4
 - pip :

diff --git a/src/conda/env_base.yml b/src/conda/env_base.yml
@@ -12,20 +12,20 @@ dependencies:
 - numpy=1.22.3
 - netCDF4=1.5.8
 - cftime=1.6
-- xarray=0.21.0
+- xarray=2023.2.0
 # Note: newer versions of cf_xarray are causing issues with missing
 # xarray dataset attributes. There seem to be modifications where
 # ds.cf attributes are defined later in the process, and this clashes
 # with the preprocessing procedures
-- cf_xarray=0.5.0
+- cf_xarray=0.8.0
 - matplotlib=3.5.3
 - pandas=1.5
 - pint=0.16
 - dask=2022.10.0
-# additions dec 2020
+- ecgtools=2022.10.7
 - cfunits=3.3.5
 - intake=0.6
-- intake-esm=2021.8.17
+- intake-esm=2022.9.18
 - subprocess32=3.5.4
 - pyyaml=6.0
 - click=8.0.4
diff --git a/src/conda/env_dev.yml b/src/conda/env_dev.yml
@@ -10,7 +10,7 @@ dependencies:
 - scipy=1.9
 - netCDF4=1.5.8
 - cftime=1.6
-- xarray=0.21.0
+- xarray=2022.11.0
 - matplotlib=3.6
 - cartopy=0.21.0
 - pandas=1.5
@@ -28,6 +28,6 @@ dependencies:
 # additions dec 2020
 - cfunits=3.3.1
 - intake=0.6
-- intake-esm=2021.8.17
+- intake-esm=2022.9.18
 # bump version 0.3.1 -> 0.4 feb 2021
-- cf_xarray=0.4.0
+- cf_xarray=0.8.0
diff --git a/src/conda/env_python3_base.yml b/src/conda/env_python3_base.yml
@@ -12,7 +12,7 @@ dependencies:
 - scipy=1.9
 - netCDF4=1.5.8
 - cftime=1.6
-- xarray=0.21.0
+- xarray=2022.11.0
 - matplotlib=3.6
 - pandas=1.5
 - cartopy=0.21.0

diff --git a/src/environment_manager.py b/src/environment_manager.py
@@ -452,7 +452,7 @@ def runtime_exception_handler(self, exc):
         execution (including setup and clean up).
         """
         chained_exc = util.chain_exc(exc, f"running {self.pod.full_name}.",
-            util.PodExecutionError)
+                                     util.PodExecutionError)
         self.pod.deactivate(chained_exc)
         self.tear_down()
         raise exc  # include in production, or just for debugging?
@@ -463,6 +463,8 @@ def tear_down(self, retcode=None):
             if hasattr(self.process, 'retcode'):
                 retcode = self.process.returncode
             try:
+                log_str = f" Tearing down runtime process for {self.pod.full_name})."
+                self.pod.log.info(log_str)
                 self.process.kill()
             except ProcessLookupError:
                 pass
@@ -472,7 +474,11 @@ def tear_down(self, retcode=None):
             if retcode == 0:
                 log_str = f"{self.pod.full_name} exited successfully (code={retcode})."
                 self.pod.log.info(log_str)
-            elif retcode is None or self.pod.failed:
+            elif retcode is None:
+                log_str = f"{self.pod.full_name} terminated, but the subprocess did not yield a return code." \
+                          f" This does not necessarily indicate a failure."
+                self.pod.log.info(log_str)
+            elif self.pod.failed:
                 log_str = f"{self.pod.full_name} was terminated or exited abnormally."
                 self.pod.log.info(log_str)
             else:
@@ -483,7 +489,7 @@ def tear_down(self, retcode=None):
         if self.pod.log_file is not None:
             self.pod.log_file.write(80 * '-' + '\n')
             self.pod.log_file.write(log_str + '\n')
-            self.pod.log_file.flush() # redundant?
+            self.pod.log_file.flush()  # redundant?
 
         if not self.pod.failed:
             self.pod.status = core.ObjectStatus.INACTIVE

diff --git a/src/output_manager.py b/src/output_manager.py
@@ -28,9 +28,11 @@ def html_templating_dict(pod):
         d[attr] = str(getattr(pod, attr, ""))
     return d
 
-class HTMLSourceFileMixin():
+
+class HTMLSourceFileMixin:
     """Convienience method to define location of html templates in one place.
     """
+
     @property
     def CASE_TEMP_HTML(self):
         """Path to temporary top-level html file for *case* that gets appended

diff --git a/src/preprocessor.py b/src/preprocessor.py
@@ -245,24 +245,24 @@ def process(self, var, ds, *args):
 
         if t_start > dt_start_upper:
             err_str = (f"Error: dataset start ({t_start}) is after "
-                f"requested date range start ({dt_start_upper}).")
+                       f"requested date range start ({dt_start_upper}).")
             var.log.error(err_str)
             raise IndexError(err_str)
         if t_end < dt_end_lower:
             err_str = (f"Error: dataset end ({t_end}) is before "
-                f"requested date range end ({dt_end_lower}).")
+                       f"requested date range end ({dt_end_lower}).")
             var.log.error(err_str)
             raise IndexError(err_str)
 
         ds = ds.sel({t_coord.name: slice(dt_start_lower, dt_end_upper)})
         new_t = ds.cf.dim_axes(tv_name).get('T')
         if t_size == new_t.size:
             var.log.info(("Requested dates for %s coincide with range of dataset "
-                "'%s -- %s'; left unmodified."),
-                var.full_name,
-                new_t.values[0].strftime('%Y-%m-%d'),
-                new_t.values[-1].strftime('%Y-%m-%d'),
-            )
+                          "'%s -- %s'; left unmodified."),
+                         var.full_name,
+                         new_t.values[0].strftime('%Y-%m-%d'),
+                         new_t.values[-1].strftime('%Y-%m-%d'),
+                         )
         else:
             var.log.info("Cropped date range of %s from '%s -- %s' to '%s -- %s'.",
                          var.full_name,

diff --git a/src/query_fetch_preprocess.py b/src/query_fetch_preprocess.py
@@ -169,7 +169,10 @@ def _dummy_esmcol_spec(self):
                 "column_name": self.remote_data_col,
                 "format": self._asset_file_format
             },
-            "last_updated": "2020-12-06"
+            "last_updated": "2020-12-06",
+            'aggregation_control': {
+                'variable_column_name': 'variable', 'groupby_attrs': []
+            }
         }
 
     @abc.abstractmethod
@@ -193,9 +196,10 @@ def setup_query(self):
         # sep: str = '.', delimiter to use when constructing key for a query
         # **kwargs: Any
 
-        self.catalog = intake_esm.core.esm_datastore.from_df(
-            self.generate_catalog(),
-            esmcol_data=self._dummy_esmcol_spec(),
+        obj = {'df': self.generate_catalog(), 'esmcat': self._dummy_esmcol_spec()}
+
+        self.catalog = intake_esm.core.esm_datastore(
+            obj,
             progressbar=False, sep='|'
         )
 
@@ -556,7 +560,7 @@ def preprocess_data(self):
                 self.fetch_data()
                 update = False
             vars_to_process = [
-                pv for pv in self.iter_vars(active=True) \
+                pv for pv in self.iter_vars(active=True)
                 if pv.var.stage < varlistentry_util.VarlistEntryStage.PREPROCESSED
             ]
             if not vars_to_process:
@@ -609,7 +613,7 @@ def request_data(self):
             if p.failed:
                 p.log.debug('Data request for %s failed.', p.full_name)
             else:
-                p.log.debug('Data request for %s completed succesfully.',
+                p.log.debug('Data request for %s completed successfully.',
                             p.full_name)
 
     def query_and_fetch_cleanup(self, signum=None, frame=None):
@@ -851,7 +855,7 @@ def request_data(self, parent):
         for p in self.iter_children():
             for v in p.iter_children():
                 if v.status == core.ObjectStatus.ACTIVE:
-                    v.log.debug('Data request for %s completed succesfully.',
+                    v.log.debug('Data request for %s completed successfully.',
                                 v.full_name)
                     v.status = core.ObjectStatus.SUCCEEDED
                 elif v.failed: