Skip to content

Commit

Permalink
update xarray and intake-esm conda packages (#447)
Browse files Browse the repository at this point in the history
* update xarray and intake-esm conda packages
add ecgtools package to the base environment

* update cf-xarray to 0.8.0 in base and dev env files

* update xarray version to 2023.2.0 in base env file

* replace deprecated from_df call with call to intake_esm_datastore
add required aggregatation_control entry to _dummy_esmcol_spec in generate_catalog

* replace axes with axes_values method in xr_parser.MDTFCFDatasetAccessorMixin and dependent methods to avoid issues with the xarray check_coordinate_axis method. This original overriding axes definition resulted in this attribute not being defined inthe cf dict that is queried by check_coordinate_axis. If the axes method was set as @Property, it was defined, but the check_coordinate_axis expects a single value for each key, not a list. Thus, axes_values defines the list of values used by the framework, and axes is correctly instantiated for parsing by xarray

* fix logic in xr_parser._old_axes_dict to deal with 2-D axes assigned to X and Y axis in the cf.coordinates dict
entries that do not match cf.coordinates values in the axes object are deleted from dims_list and coords_list
entries that do match the cf.coordinates values that are missing from dims_list and coords_list are appended
the result is a single value assigned to the X and Y axes instead of, for example a list with [lat, nlat] or [lon, nlon]

* general cleanup
added more descriptive logging messages for single run teardown method to the environment manager
  • Loading branch information
wrongkindofdoctor authored Feb 16, 2023
1 parent 97c2417 commit da9a0bd
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 88 deletions.
6 changes: 3 additions & 3 deletions src/conda/_env_synthetic_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ dependencies:
- numpy=1.22.3
- netCDF4=1.5.8
- cftime>=1.6
- xarray>=2022.06.0
- xarray=2022.11.0
- setuptools>=49.1
- esmf=8.2.0
- esmpy=8.2.0
- xesmf=0.6.2
- python-dateutil >= 2.8.0
- pandas>=1.5
- pytz=2020.4
- pytest >= 6.2.4
- pyyaml >= 6.0
- pytest>=6.2.4
- pyyaml>=6.0
- pip=21.3.1
- pytest>=6.2.4
- pip :
Expand Down
8 changes: 4 additions & 4 deletions src/conda/env_base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@ dependencies:
- numpy=1.22.3
- netCDF4=1.5.8
- cftime=1.6
- xarray=0.21.0
- xarray=2023.2.0
# Note: newer versions of cf_xarray are causing issues with missing
# xarray dataset attributes. There seem to be modifications where
# ds.cf attributes are defined later in the process, and this clashes
# with the preprocessing procedures
- cf_xarray=0.5.0
- cf_xarray=0.8.0
- matplotlib=3.5.3
- pandas=1.5
- pint=0.16
- dask=2022.10.0
# additions dec 2020
- ecgtools=2022.10.7
- cfunits=3.3.5
- intake=0.6
- intake-esm=2021.8.17
- intake-esm=2022.9.18
- subprocess32=3.5.4
- pyyaml=6.0
- click=8.0.4
6 changes: 3 additions & 3 deletions src/conda/env_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies:
- scipy=1.9
- netCDF4=1.5.8
- cftime=1.6
- xarray=0.21.0
- xarray=2022.11.0
- matplotlib=3.6
- cartopy=0.21.0
- pandas=1.5
Expand All @@ -28,6 +28,6 @@ dependencies:
# additions dec 2020
- cfunits=3.3.1
- intake=0.6
- intake-esm=2021.8.17
- intake-esm=2022.9.18
# bump version 0.3.1 -> 0.4 feb 2021
- cf_xarray=0.4.0
- cf_xarray=0.8.0
2 changes: 1 addition & 1 deletion src/conda/env_python3_base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- scipy=1.9
- netCDF4=1.5.8
- cftime=1.6
- xarray=0.21.0
- xarray=2022.11.0
- matplotlib=3.6
- pandas=1.5
- cartopy=0.21.0
Expand Down
12 changes: 9 additions & 3 deletions src/environment_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def runtime_exception_handler(self, exc):
execution (including setup and clean up).
"""
chained_exc = util.chain_exc(exc, f"running {self.pod.full_name}.",
util.PodExecutionError)
util.PodExecutionError)
self.pod.deactivate(chained_exc)
self.tear_down()
raise exc # include in production, or just for debugging?
Expand All @@ -463,6 +463,8 @@ def tear_down(self, retcode=None):
if hasattr(self.process, 'retcode'):
retcode = self.process.returncode
try:
log_str = f" Tearing down runtime process for {self.pod.full_name})."
self.pod.log.info(log_str)
self.process.kill()
except ProcessLookupError:
pass
Expand All @@ -472,7 +474,11 @@ def tear_down(self, retcode=None):
if retcode == 0:
log_str = f"{self.pod.full_name} exited successfully (code={retcode})."
self.pod.log.info(log_str)
elif retcode is None or self.pod.failed:
elif retcode is None:
log_str = f"{self.pod.full_name} terminated, but the subprocess did not yield a return code." \
f" This does not necessarily indicate a failure."
self.pod.log.info(log_str)
elif self.pod.failed:
log_str = f"{self.pod.full_name} was terminated or exited abnormally."
self.pod.log.info(log_str)
else:
Expand All @@ -483,7 +489,7 @@ def tear_down(self, retcode=None):
if self.pod.log_file is not None:
self.pod.log_file.write(80 * '-' + '\n')
self.pod.log_file.write(log_str + '\n')
self.pod.log_file.flush() # redundant?
self.pod.log_file.flush() # redundant?

if not self.pod.failed:
self.pod.status = core.ObjectStatus.INACTIVE
Expand Down
4 changes: 3 additions & 1 deletion src/output_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ def html_templating_dict(pod):
d[attr] = str(getattr(pod, attr, ""))
return d

class HTMLSourceFileMixin():

class HTMLSourceFileMixin:
"""Convienience method to define location of html templates in one place.
"""

@property
def CASE_TEMP_HTML(self):
"""Path to temporary top-level html file for *case* that gets appended
Expand Down
14 changes: 7 additions & 7 deletions src/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,24 +245,24 @@ def process(self, var, ds, *args):

if t_start > dt_start_upper:
err_str = (f"Error: dataset start ({t_start}) is after "
f"requested date range start ({dt_start_upper}).")
f"requested date range start ({dt_start_upper}).")
var.log.error(err_str)
raise IndexError(err_str)
if t_end < dt_end_lower:
err_str = (f"Error: dataset end ({t_end}) is before "
f"requested date range end ({dt_end_lower}).")
f"requested date range end ({dt_end_lower}).")
var.log.error(err_str)
raise IndexError(err_str)

ds = ds.sel({t_coord.name: slice(dt_start_lower, dt_end_upper)})
new_t = ds.cf.dim_axes(tv_name).get('T')
if t_size == new_t.size:
var.log.info(("Requested dates for %s coincide with range of dataset "
"'%s -- %s'; left unmodified."),
var.full_name,
new_t.values[0].strftime('%Y-%m-%d'),
new_t.values[-1].strftime('%Y-%m-%d'),
)
"'%s -- %s'; left unmodified."),
var.full_name,
new_t.values[0].strftime('%Y-%m-%d'),
new_t.values[-1].strftime('%Y-%m-%d'),
)
else:
var.log.info("Cropped date range of %s from '%s -- %s' to '%s -- %s'.",
var.full_name,
Expand Down
18 changes: 11 additions & 7 deletions src/query_fetch_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ def _dummy_esmcol_spec(self):
"column_name": self.remote_data_col,
"format": self._asset_file_format
},
"last_updated": "2020-12-06"
"last_updated": "2020-12-06",
'aggregation_control': {
'variable_column_name': 'variable', 'groupby_attrs': []
}
}

@abc.abstractmethod
Expand All @@ -193,9 +196,10 @@ def setup_query(self):
# sep: str = '.', delimiter to use when constructing key for a query
# **kwargs: Any

self.catalog = intake_esm.core.esm_datastore.from_df(
self.generate_catalog(),
esmcol_data=self._dummy_esmcol_spec(),
obj = {'df': self.generate_catalog(), 'esmcat': self._dummy_esmcol_spec()}

self.catalog = intake_esm.core.esm_datastore(
obj,
progressbar=False, sep='|'
)

Expand Down Expand Up @@ -556,7 +560,7 @@ def preprocess_data(self):
self.fetch_data()
update = False
vars_to_process = [
pv for pv in self.iter_vars(active=True) \
pv for pv in self.iter_vars(active=True)
if pv.var.stage < varlistentry_util.VarlistEntryStage.PREPROCESSED
]
if not vars_to_process:
Expand Down Expand Up @@ -609,7 +613,7 @@ def request_data(self):
if p.failed:
p.log.debug('Data request for %s failed.', p.full_name)
else:
p.log.debug('Data request for %s completed succesfully.',
p.log.debug('Data request for %s completed successfully.',
p.full_name)

def query_and_fetch_cleanup(self, signum=None, frame=None):
Expand Down Expand Up @@ -851,7 +855,7 @@ def request_data(self, parent):
for p in self.iter_children():
for v in p.iter_children():
if v.status == core.ObjectStatus.ACTIVE:
v.log.debug('Data request for %s completed succesfully.',
v.log.debug('Data request for %s completed successfully.',
v.full_name)
v.status = core.ObjectStatus.SUCCEEDED
elif v.failed:
Expand Down
Loading

0 comments on commit da9a0bd

Please sign in to comment.