Skip to content

Commit

Permalink
Make tar output optional and warn about bug w/ old tar versions (#160)
Browse files Browse the repository at this point in the history
* Enable user specification of writing calc output to .tar file or not
* Changed _compute_or_skip_on_error to skip on any exception,
rather than just RuntimeError.
* Use the pytest-catchlog plugin to test logging, and add it to the CI
files and to setup.py
* Replace find_obj.py w/ automate.py in API reference section of docs
* Remove unused CalcInterface 'verbose' and 'write_to_tar' init kwargs
  • Loading branch information
Spencer Hill authored Mar 28, 2017
1 parent c5864c2 commit f585b36
Show file tree
Hide file tree
Showing 11 changed files with 196 additions and 54 deletions.
91 changes: 72 additions & 19 deletions aospy/automate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import itertools
import logging
import pprint
import traceback

from .calc import Calc, CalcInterface
from .region import Region
Expand Down Expand Up @@ -241,36 +242,88 @@ def create_calcs(self):
self._combine_core_aux_specs()]


def _compute_or_skip_on_error(calc):
"""Execute the Calc, catching and logging exceptions, but no re-raise.
def _compute_or_skip_on_error(calc, compute_kwargs):
"""Execute the Calc, catching and logging exceptions, but don't re-raise.
Prevents one failed calculation from stopping a larger requested set
of calculations.
"""
try:
result = calc.compute()
except RuntimeError as e:
logging.warn(repr(e))
else:
return result
return calc.compute(**compute_kwargs)
except Exception as e:
msg = ("Skipping aospy calculation `{0}` due to error with the "
"following traceback: \n{1}")
logging.warn(msg.format(calc, traceback.format_exc()))
return None


def _exec_calcs(calcs, parallelize=False, **compute_kwargs):
"""Execute the given calculations.
Parameters
----------
calcs : Sequence of ``aospy.Calc`` objects
parallelize : bool, default False
Whether to submit the calculations in parallel or not
compute_kwargs : dict of keyword arguments passed to ``Calc.compute``
def exec_calcs(calcs, parallelize=False):
Returns
-------
A list of the values returned by each Calc object that was executed.
"""
if parallelize:
pool = multiprocess.Pool()
return pool.map(lambda calc: _compute_or_skip_on_error(calc), calcs)
return pool.map(lambda calc:
_compute_or_skip_on_error(calc, compute_kwargs),
calcs)
else:
return [_compute_or_skip_on_error(calc) for calc in calcs]
return [_compute_or_skip_on_error(calc, compute_kwargs)
for calc in calcs]


def _print_suite_summary(calc_suite_specs):
"""Print summary of requested calculations."""
return ('\nRequested aospy calculations:\n' +
pprint.pformat(calc_suite_specs) + '\n')


def submit_mult_calcs(calc_suite_specs, parallelize=False,
prompt_verify=False, verbose=True):
"""Generate and execute all specified computations."""
def submit_mult_calcs(calc_suite_specs, exec_options=None):
"""Generate and execute all specified computations.
Parameters
----------
calc_suite_specs : dict
The specifications describing the full set of calculations to be
generated and potentially executed.
exec_options : dict or None (default None)
Options regarding how the calculations are reported, submitted, and
saved. If None, default settings are used for all options. Currently
supported options (each should be either `True` or `False`):
- prompt_verify : If True, print summary of calculations to be
performed and prompt user to confirm before submitting for
execution
- parallelize : If True, submit calculations in parallel
- write_to_tar : If True, write results of calculations to .tar files,
one for each object. These tar files have an identical directory
structures the standard output relative to their root directory,
which is specified via the `tar_direc_out` argument of each Proj
object's instantiation.
Returns
-------
A list of the values returned by each Calc object that was executed.
Raises
------
AospyException : if the ``prompt_verify`` option is set to True and the
user does not respond affirmatively to the prompt.
"""
if exec_options is None:
exec_options = dict()
if exec_options.pop('prompt_verify', False):
_print_suite_summary(calc_suite_specs)
_user_verify()
calc_suite = CalcSuite(calc_suite_specs)
calcs = calc_suite.create_calcs()
if prompt_verify:
print('\nRequested aospy calculations:\n')
pprint.pprint(calc_suite_specs)
print()
_user_verify()
return exec_calcs(calcs, parallelize=parallelize)
return _exec_calcs(calcs, **exec_options)
28 changes: 18 additions & 10 deletions aospy/calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def _set_data_attrs(self):
def __init__(self, proj=None, model=None, run=None, ens_mem=None, var=None,
date_range=None, region=None, intvl_in=None, intvl_out=None,
dtype_in_time=None, dtype_in_vert=None, dtype_out_time=None,
dtype_out_vert=None, level=None, time_offset=None,
verbose=True):
dtype_out_vert=None, level=None, time_offset=None):
"""Instantiate a CalcInterface object.
Parameters
Expand Down Expand Up @@ -165,7 +164,6 @@ def __init__(self, proj=None, model=None, run=None, ens_mem=None, var=None,
self.domain = self.var.domain
self.def_time = self.var.def_time
self.def_vert = self.var.def_vert
self.verbose = verbose

try:
self.function = self.var.func
Expand Down Expand Up @@ -627,7 +625,7 @@ def _make_full_mean_eddy_ts(self, data):
eddy = self._full_to_yearly_ts(eddy, full_dt)
return full, monthly, eddy

def compute(self, save_files=True, save_tar_files=True):
def compute(self, write_to_tar=True):
"""Perform all desired calculations on the data and save externally."""
data = self._prep_data(self._get_all_data(self.start_date,
self.end_date),
Expand All @@ -639,7 +637,7 @@ def compute(self, save_files=True, save_tar_files=True):
logging.info("Writing desired gridded outputs to disk.")
for dtype_time, data in reduced.items():
self.save(data, dtype_time, dtype_out_vert=self.dtype_out_vert,
save_files=save_files, save_tar_files=save_tar_files)
save_files=True, write_to_tar=write_to_tar)
return self

def _save_files(self, data, dtype_out_time):
Expand All @@ -662,7 +660,7 @@ def _save_files(self, data, dtype_out_time):
data_out = xr.Dataset({self.name: data_out})
data_out.to_netcdf(path, engine='scipy')

def _save_tar_files(self, dtype_out_time):
def _write_to_tar(self, dtype_out_time):
"""Add the data to the tar file in tar_out_direc."""
if not os.path.isdir(self.dir_tar_out):
os.makedirs(self.dir_tar_out)
Expand All @@ -684,10 +682,20 @@ def _save_tar_files(self, dtype_out_time):
# The os module treats files on archive as non-empty
# directories, so can't use os.remove or os.rmdir.
shutil.rmtree(old_data_path)
subprocess.call([
retcode = subprocess.call([
"tar", "--delete", "--file={}".format(self.path_tar_out),
self.file_name[dtype_out_time]
])
if retcode:
msg = ("The 'tar' command to save your aospy output "
"exited with an error. Most likely, this is due "
"to using an old version of 'tar' (especially if "
"you are on a Mac). Consider installing a newer "
"version of 'tar' or disabling tar output by "
"setting `write_to_tar=False` in the "
"`calc_exec_options` argument of "
"`submit_mult_calcs`.")
logging.warn(msg)
with tarfile.open(self.path_tar_out, 'a') as tar:
tar.add(self.path_out[dtype_out_time],
arcname=self.file_name[dtype_out_time])
Expand All @@ -700,13 +708,13 @@ def _update_data_out(self, data, dtype):
self.data_out = {dtype: data}

def save(self, data, dtype_out_time, dtype_out_vert=False,
save_files=True, save_tar_files=False):
save_files=True, write_to_tar=False):
"""Save aospy data to data_out attr and to an external file."""
self._update_data_out(data, dtype_out_time)
if save_files:
self._save_files(data, dtype_out_time)
if save_tar_files and self.proj[0].tar_direc_out:
self._save_tar_files(dtype_out_time)
if write_to_tar and self.proj[0].tar_direc_out:
self._write_to_tar(dtype_out_time)
logging.info('\t{}'.format(self.path_out[dtype_out_time]))

def _load_from_disk(self, dtype_out_time, dtype_out_vert=False,
Expand Down
42 changes: 36 additions & 6 deletions aospy/test/test_automate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
_get_all_objs_of_type, _merge_dicts,
_input_func_py2_py3, AospyException,
_user_verify, CalcSuite, _MODELS_STR, _RUNS_STR,
_VARIABLES_STR, _REGIONS_STR, submit_mult_calcs)
_VARIABLES_STR, _REGIONS_STR,
_compute_or_skip_on_error, submit_mult_calcs)
from .data.objects import examples as lib
from .data.objects.examples import (
example_proj, example_model, example_run, condensation_rain,
Expand Down Expand Up @@ -174,14 +175,43 @@ def calcsuite_init_specs_single_calc(calcsuite_init_specs):
yield specs
# Teardown procedure
for direc in [example_proj.direc_out, example_proj.tar_direc_out]:
shutil.rmtree(direc)
shutil.rmtree(direc, ignore_errors=True)


@pytest.mark.parametrize(('parallelize'), [False, True])
def test_submit_mult_calcs(calcsuite_init_specs_single_calc, parallelize):
calc = submit_mult_calcs(calcsuite_init_specs_single_calc, parallelize)[0]
@pytest.fixture
def calc(calcsuite_init_specs_single_calc):
return CalcSuite(calcsuite_init_specs_single_calc).create_calcs()[0]


def test_compute_or_skip_on_error(calc, caplog):
result = _compute_or_skip_on_error(calc, dict(write_to_tar=False))
assert result is calc

calc.start_date = 'dummy'
result = _compute_or_skip_on_error(calc, dict(write_to_tar=False))
log_record = caplog.record_tuples[-1][-1]
assert log_record.startswith("Skipping aospy calculation")
assert result is None


@pytest.mark.parametrize(
('exec_options'),
[dict(parallelize=False, write_to_tar=False),
dict(parallelize=True, write_to_tar=False),
dict(parallelize=False, write_to_tar=True),
dict(parallelize=True, write_to_tar=True),
None])
def test_submit_mult_calcs(calcsuite_init_specs_single_calc, exec_options):
calc = submit_mult_calcs(calcsuite_init_specs_single_calc, exec_options)[0]
assert isfile(calc.path_out['av'])
assert isfile(calc.path_tar_out)
if exec_options is None:
write_to_tar = True
else:
write_to_tar = exec_options.pop('write_to_tar', True)
if write_to_tar:
assert isfile(calc.path_tar_out)
else:
assert not isfile(calc.path_tar_out)


@pytest.fixture
Expand Down
1 change: 1 addition & 0 deletions ci/environment-py27.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ dependencies:
- coveralls
- multiprocess
- pytest-cov
- pytest-catchlog
1 change: 1 addition & 0 deletions ci/environment-py34.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ dependencies:
- coveralls
- multiprocess
- pytest-cov
- pytest-catchlog
1 change: 1 addition & 0 deletions ci/environment-py35.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ dependencies:
- coveralls
- multiprocess
- pytest-cov
- pytest-catchlog
1 change: 1 addition & 0 deletions ci/environment-py36.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ dependencies:
- coveralls
- multiprocess
- pytest-cov
- pytest-catchlog
6 changes: 3 additions & 3 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ no direct interfacing with ``Calc`` or it's helper class,
``CalcInterface`` is required by the user, in which case this section
should be skipped entirely.

Also included is the ``find_obj`` module, which enables aospy e.g. in
Also included is the ``automate`` module, which enables aospy e.g. in
the main script to find objects in the user's object library that the
user specifies via their string names rather than having to import the
objects themselves.
Expand All @@ -156,10 +156,10 @@ CalcInterface and Calc

.. automethod:: aospy.calc.Calc.__init__

find_obj
automate
--------

.. automodule:: aospy.find_obj
.. automodule:: aospy.automate
:members:
:undoc-members:

Expand Down
38 changes: 33 additions & 5 deletions docs/whats-new.rst
Original file line number Diff line number Diff line change
@@ -1,23 +1,49 @@
.. _whats-new:

What's New
==========

.. _whats-new.0.1.2:

v0.1.2 (XX March 2017)
----------------------

This release improves the process of submitting multiple calculations
for automatic execution: the user interface, documentation, internal
logic, and packaging all received upgrades.
for automatic execution. the user interface, documentation, internal
logic, and packaging all received upgrades and/or bugfixes.

Enhancements
~~~~~~~~~~~~

- Improve readability/usability of the included example script
``aospy_main.py`` for submitting aospy calculations by moving all
internal logic into new ``automate.py`` module (fixes :issue:`152` via
:pull:`155`).
internal logic into new ``automate.py`` module (fixes :issue:`152`
via :pull:`155`). By `Spencer Clark
<https://github.com/spencerkclark>`_ and `Spencer Hill
<https://github.com/spencerahill>`_.
- Include an example library of aospy objects that works
out-of-the-box with the provided example main script (fixes
:issue:`151` via :pull:`155`).
:issue:`151` via :pull:`155`). By `Spencer Clark
<https://github.com/spencerkclark>`_ and `Spencer Hill
<https://github.com/spencerahill>`_.
- Enable user to specify whether or not to write output to .tar files
(in addition to the standard output). Also document an error that
occurs when writing output to .tar files for sufficiently old
versions of tar (including the version that ships standard on
MacOS), and print a warning when errors are caught during the 'tar'
call (fixes one-half of :issue:`157` via :pull:`160`). By `Spencer Hill
<https://github.com/spencerahill>`_.

Bug fixes
~~~~~~~~~

- Use the 'scipy' engine for the `xarray.DataArray.to_netcdf
<http://xarray.pydata.org/en/stable/generated/xarray.DataArray.to_netcdf.html?highlight=to_netcdf>`_
call when writing aospy calculation outputs to disk to prevent a bug
when trying to re-write to an existing netCDF file (fixes one-half
of :issue:`157` via :pull:`160`).

.. _whats-new.0.1.1:

v0.1.1 (2 March 2017)
---------------------
Expand Down Expand Up @@ -73,6 +99,8 @@ Bug fixes
requested (fixes :issue:`138` through :pull:`139`). By `Spencer
Clark <https://github.com/spencerkclark>`_.

.. _whats-new.0.1:

v0.1 (24 January 2017)
----------------------
- Initial release!
Expand Down
Loading

0 comments on commit f585b36

Please sign in to comment.