diff --git a/aospy/automate.py b/aospy/automate.py index 920af25..1c517b5 100644 --- a/aospy/automate.py +++ b/aospy/automate.py @@ -4,6 +4,7 @@ import itertools import logging import pprint +import traceback from .calc import Calc, CalcInterface from .region import Region @@ -241,36 +242,88 @@ def create_calcs(self): self._combine_core_aux_specs()] -def _compute_or_skip_on_error(calc): - """Execute the Calc, catching and logging exceptions, but no re-raise. +def _compute_or_skip_on_error(calc, compute_kwargs): + """Execute the Calc, catching and logging exceptions, but don't re-raise. Prevents one failed calculation from stopping a larger requested set of calculations. """ try: - result = calc.compute() - except RuntimeError as e: - logging.warn(repr(e)) - else: - return result + return calc.compute(**compute_kwargs) + except Exception as e: + msg = ("Skipping aospy calculation `{0}` due to error with the " + "following traceback: \n{1}") + logging.warn(msg.format(calc, traceback.format_exc())) + return None + + +def _exec_calcs(calcs, parallelize=False, **compute_kwargs): + """Execute the given calculations. + Parameters + ---------- + calcs : Sequence of ``aospy.Calc`` objects + parallelize : bool, default False + Whether to submit the calculations in parallel or not + compute_kwargs : dict of keyword arguments passed to ``Calc.compute`` -def exec_calcs(calcs, parallelize=False): + Returns + ------- + A list of the values returned by each Calc object that was executed. + """ if parallelize: pool = multiprocess.Pool() - return pool.map(lambda calc: _compute_or_skip_on_error(calc), calcs) + return pool.map(lambda calc: + _compute_or_skip_on_error(calc, compute_kwargs), + calcs) else: - return [_compute_or_skip_on_error(calc) for calc in calcs] + return [_compute_or_skip_on_error(calc, compute_kwargs) + for calc in calcs] + + +def _print_suite_summary(calc_suite_specs): + """Print summary of requested calculations.""" + return ('\nRequested aospy calculations:\n' + + pprint.pformat(calc_suite_specs) + '\n') -def submit_mult_calcs(calc_suite_specs, parallelize=False, - prompt_verify=False, verbose=True): - """Generate and execute all specified computations.""" +def submit_mult_calcs(calc_suite_specs, exec_options=None): + """Generate and execute all specified computations. + + Parameters + ---------- + calc_suite_specs : dict + The specifications describing the full set of calculations to be + generated and potentially executed. + exec_options : dict or None (default None) + Options regarding how the calculations are reported, submitted, and + saved. If None, default settings are used for all options. Currently + supported options (each should be either `True` or `False`): + + - prompt_verify : If True, print summary of calculations to be + performed and prompt user to confirm before submitting for + execution + - parallelize : If True, submit calculations in parallel + - write_to_tar : If True, write results of calculations to .tar files, + one for each object. These tar files have an identical directory + structures the standard output relative to their root directory, + which is specified via the `tar_direc_out` argument of each Proj + object's instantiation. + + Returns + ------- + A list of the values returned by each Calc object that was executed. + + Raises + ------ + AospyException : if the ``prompt_verify`` option is set to True and the + user does not respond affirmatively to the prompt. + """ + if exec_options is None: + exec_options = dict() + if exec_options.pop('prompt_verify', False): + _print_suite_summary(calc_suite_specs) + _user_verify() calc_suite = CalcSuite(calc_suite_specs) calcs = calc_suite.create_calcs() - if prompt_verify: - print('\nRequested aospy calculations:\n') - pprint.pprint(calc_suite_specs) - print() - _user_verify() - return exec_calcs(calcs, parallelize=parallelize) + return _exec_calcs(calcs, **exec_options) diff --git a/aospy/calc.py b/aospy/calc.py index 96278ef..b529a13 100644 --- a/aospy/calc.py +++ b/aospy/calc.py @@ -56,8 +56,7 @@ def _set_data_attrs(self): def __init__(self, proj=None, model=None, run=None, ens_mem=None, var=None, date_range=None, region=None, intvl_in=None, intvl_out=None, dtype_in_time=None, dtype_in_vert=None, dtype_out_time=None, - dtype_out_vert=None, level=None, time_offset=None, - verbose=True): + dtype_out_vert=None, level=None, time_offset=None): """Instantiate a CalcInterface object. Parameters @@ -165,7 +164,6 @@ def __init__(self, proj=None, model=None, run=None, ens_mem=None, var=None, self.domain = self.var.domain self.def_time = self.var.def_time self.def_vert = self.var.def_vert - self.verbose = verbose try: self.function = self.var.func @@ -627,7 +625,7 @@ def _make_full_mean_eddy_ts(self, data): eddy = self._full_to_yearly_ts(eddy, full_dt) return full, monthly, eddy - def compute(self, save_files=True, save_tar_files=True): + def compute(self, write_to_tar=True): """Perform all desired calculations on the data and save externally.""" data = self._prep_data(self._get_all_data(self.start_date, self.end_date), @@ -639,7 +637,7 @@ def compute(self, save_files=True, save_tar_files=True): logging.info("Writing desired gridded outputs to disk.") for dtype_time, data in reduced.items(): self.save(data, dtype_time, dtype_out_vert=self.dtype_out_vert, - save_files=save_files, save_tar_files=save_tar_files) + save_files=True, write_to_tar=write_to_tar) return self def _save_files(self, data, dtype_out_time): @@ -662,7 +660,7 @@ def _save_files(self, data, dtype_out_time): data_out = xr.Dataset({self.name: data_out}) data_out.to_netcdf(path, engine='scipy') - def _save_tar_files(self, dtype_out_time): + def _write_to_tar(self, dtype_out_time): """Add the data to the tar file in tar_out_direc.""" if not os.path.isdir(self.dir_tar_out): os.makedirs(self.dir_tar_out) @@ -684,10 +682,20 @@ def _save_tar_files(self, dtype_out_time): # The os module treats files on archive as non-empty # directories, so can't use os.remove or os.rmdir. shutil.rmtree(old_data_path) - subprocess.call([ + retcode = subprocess.call([ "tar", "--delete", "--file={}".format(self.path_tar_out), self.file_name[dtype_out_time] ]) + if retcode: + msg = ("The 'tar' command to save your aospy output " + "exited with an error. Most likely, this is due " + "to using an old version of 'tar' (especially if " + "you are on a Mac). Consider installing a newer " + "version of 'tar' or disabling tar output by " + "setting `write_to_tar=False` in the " + "`calc_exec_options` argument of " + "`submit_mult_calcs`.") + logging.warn(msg) with tarfile.open(self.path_tar_out, 'a') as tar: tar.add(self.path_out[dtype_out_time], arcname=self.file_name[dtype_out_time]) @@ -700,13 +708,13 @@ def _update_data_out(self, data, dtype): self.data_out = {dtype: data} def save(self, data, dtype_out_time, dtype_out_vert=False, - save_files=True, save_tar_files=False): + save_files=True, write_to_tar=False): """Save aospy data to data_out attr and to an external file.""" self._update_data_out(data, dtype_out_time) if save_files: self._save_files(data, dtype_out_time) - if save_tar_files and self.proj[0].tar_direc_out: - self._save_tar_files(dtype_out_time) + if write_to_tar and self.proj[0].tar_direc_out: + self._write_to_tar(dtype_out_time) logging.info('\t{}'.format(self.path_out[dtype_out_time])) def _load_from_disk(self, dtype_out_time, dtype_out_vert=False, diff --git a/aospy/test/test_automate.py b/aospy/test/test_automate.py index 68f0061..fb8e426 100644 --- a/aospy/test/test_automate.py +++ b/aospy/test/test_automate.py @@ -9,7 +9,8 @@ _get_all_objs_of_type, _merge_dicts, _input_func_py2_py3, AospyException, _user_verify, CalcSuite, _MODELS_STR, _RUNS_STR, - _VARIABLES_STR, _REGIONS_STR, submit_mult_calcs) + _VARIABLES_STR, _REGIONS_STR, + _compute_or_skip_on_error, submit_mult_calcs) from .data.objects import examples as lib from .data.objects.examples import ( example_proj, example_model, example_run, condensation_rain, @@ -174,14 +175,43 @@ def calcsuite_init_specs_single_calc(calcsuite_init_specs): yield specs # Teardown procedure for direc in [example_proj.direc_out, example_proj.tar_direc_out]: - shutil.rmtree(direc) + shutil.rmtree(direc, ignore_errors=True) -@pytest.mark.parametrize(('parallelize'), [False, True]) -def test_submit_mult_calcs(calcsuite_init_specs_single_calc, parallelize): - calc = submit_mult_calcs(calcsuite_init_specs_single_calc, parallelize)[0] +@pytest.fixture +def calc(calcsuite_init_specs_single_calc): + return CalcSuite(calcsuite_init_specs_single_calc).create_calcs()[0] + + +def test_compute_or_skip_on_error(calc, caplog): + result = _compute_or_skip_on_error(calc, dict(write_to_tar=False)) + assert result is calc + + calc.start_date = 'dummy' + result = _compute_or_skip_on_error(calc, dict(write_to_tar=False)) + log_record = caplog.record_tuples[-1][-1] + assert log_record.startswith("Skipping aospy calculation") + assert result is None + + +@pytest.mark.parametrize( + ('exec_options'), + [dict(parallelize=False, write_to_tar=False), + dict(parallelize=True, write_to_tar=False), + dict(parallelize=False, write_to_tar=True), + dict(parallelize=True, write_to_tar=True), + None]) +def test_submit_mult_calcs(calcsuite_init_specs_single_calc, exec_options): + calc = submit_mult_calcs(calcsuite_init_specs_single_calc, exec_options)[0] assert isfile(calc.path_out['av']) - assert isfile(calc.path_tar_out) + if exec_options is None: + write_to_tar = True + else: + write_to_tar = exec_options.pop('write_to_tar', True) + if write_to_tar: + assert isfile(calc.path_tar_out) + else: + assert not isfile(calc.path_tar_out) @pytest.fixture diff --git a/ci/environment-py27.yml b/ci/environment-py27.yml index 957158e..85de780 100644 --- a/ci/environment-py27.yml +++ b/ci/environment-py27.yml @@ -13,3 +13,4 @@ dependencies: - coveralls - multiprocess - pytest-cov + - pytest-catchlog diff --git a/ci/environment-py34.yml b/ci/environment-py34.yml index e481830..e80c166 100644 --- a/ci/environment-py34.yml +++ b/ci/environment-py34.yml @@ -13,3 +13,4 @@ dependencies: - coveralls - multiprocess - pytest-cov + - pytest-catchlog \ No newline at end of file diff --git a/ci/environment-py35.yml b/ci/environment-py35.yml index 67f97df..a2eab3e 100644 --- a/ci/environment-py35.yml +++ b/ci/environment-py35.yml @@ -13,3 +13,4 @@ dependencies: - coveralls - multiprocess - pytest-cov + - pytest-catchlog \ No newline at end of file diff --git a/ci/environment-py36.yml b/ci/environment-py36.yml index 89325e5..abab91d 100644 --- a/ci/environment-py36.yml +++ b/ci/environment-py36.yml @@ -13,3 +13,4 @@ dependencies: - coveralls - multiprocess - pytest-cov + - pytest-catchlog \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst index 89664b8..ccc3d41 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -136,7 +136,7 @@ no direct interfacing with ``Calc`` or it's helper class, ``CalcInterface`` is required by the user, in which case this section should be skipped entirely. -Also included is the ``find_obj`` module, which enables aospy e.g. in +Also included is the ``automate`` module, which enables aospy e.g. in the main script to find objects in the user's object library that the user specifies via their string names rather than having to import the objects themselves. @@ -156,10 +156,10 @@ CalcInterface and Calc .. automethod:: aospy.calc.Calc.__init__ -find_obj +automate -------- -.. automodule:: aospy.find_obj +.. automodule:: aospy.automate :members: :undoc-members: diff --git a/docs/whats-new.rst b/docs/whats-new.rst index c52a3aa..ad7ffc7 100644 --- a/docs/whats-new.rst +++ b/docs/whats-new.rst @@ -1,23 +1,49 @@ +.. _whats-new: + What's New ========== +.. _whats-new.0.1.2: + v0.1.2 (XX March 2017) ---------------------- This release improves the process of submitting multiple calculations -for automatic execution: the user interface, documentation, internal -logic, and packaging all received upgrades. +for automatic execution. the user interface, documentation, internal +logic, and packaging all received upgrades and/or bugfixes. Enhancements ~~~~~~~~~~~~ - Improve readability/usability of the included example script ``aospy_main.py`` for submitting aospy calculations by moving all - internal logic into new ``automate.py`` module (fixes :issue:`152` via - :pull:`155`). + internal logic into new ``automate.py`` module (fixes :issue:`152` + via :pull:`155`). By `Spencer Clark + `_ and `Spencer Hill + `_. - Include an example library of aospy objects that works out-of-the-box with the provided example main script (fixes - :issue:`151` via :pull:`155`). + :issue:`151` via :pull:`155`). By `Spencer Clark + `_ and `Spencer Hill + `_. +- Enable user to specify whether or not to write output to .tar files + (in addition to the standard output). Also document an error that + occurs when writing output to .tar files for sufficiently old + versions of tar (including the version that ships standard on + MacOS), and print a warning when errors are caught during the 'tar' + call (fixes one-half of :issue:`157` via :pull:`160`). By `Spencer Hill + `_. + +Bug fixes +~~~~~~~~~ + +- Use the 'scipy' engine for the `xarray.DataArray.to_netcdf + `_ + call when writing aospy calculation outputs to disk to prevent a bug + when trying to re-write to an existing netCDF file (fixes one-half + of :issue:`157` via :pull:`160`). + +.. _whats-new.0.1.1: v0.1.1 (2 March 2017) --------------------- @@ -73,6 +99,8 @@ Bug fixes requested (fixes :issue:`138` through :pull:`139`). By `Spencer Clark `_. +.. _whats-new.0.1: + v0.1 (24 January 2017) ---------------------- - Initial release! diff --git a/examples/aospy_main.py b/examples/aospy_main.py index 2729e52..fc0bf7d 100755 --- a/examples/aospy_main.py +++ b/examples/aospy_main.py @@ -54,6 +54,8 @@ import example_obj_lib as lib +# This dictionary contains all of the specifications of calculations that you +# wish to permute over. calc_suite_specs = dict( # Consult `CalcInterface` API reference for further explanation # of each option and accepted values. @@ -99,15 +101,26 @@ ) -# Submit all calculations in parallel. Requires 'multiprocess' package -# (which can be obtained e.g. via `pip install multiprocess`). -parallelize = False -# List calculations to be performed and prompt for your verification. -prompt_verify = True +# This dictionary contains options regarding how the calculations are displayed +# to you, submitted for execution, and saved upon execution. +calc_exec_options = dict( + # List calculations to be performed and prompt for your verification before + # submitting them for execution. + prompt_verify=True, + + # Submit all calculations in parallel. Requires 'multiprocess' package + # (which can be obtained e.g. via `pip install multiprocess`). + parallelize=False, + + # Save results of calculations to .tar files, one for each Run object. + # These tar files are placed using the same directory structure as the + # standard output relative to their root directory, which is specified via + # the `tar_direc_out` argument of each Proj object's instantiation. + write_to_tar=True, +) # Don't modify this statement. if __name__ == '__main__': - calcs = submit_mult_calcs(calc_suite_specs, parallelize=parallelize, - prompt_verify=prompt_verify) + calcs = submit_mult_calcs(calc_suite_specs, calc_exec_options) diff --git a/setup.py b/setup.py index 8d1fafa..2526280 100644 --- a/setup.py +++ b/setup.py @@ -23,10 +23,16 @@ author_email="shill@atmos.ucla.edu", description="Automated gridded climate data analysis and management", long_description=LONG_DESCRIPTION, - install_requires=['numpy >= 1.7', 'scipy >= 0.16', 'pandas >= 0.15.0', - 'netCDF4 >= 1.2', 'toolz >= 0.7.2', 'dask >= 0.12', - 'xarray >= 0.9.1', 'cloudpickle >= 0.2.1'], - tests_require=['pytest >= 2.7.1'], + install_requires=['numpy >= 1.7', + 'scipy >= 0.16', + 'pandas >= 0.15.0', + 'netCDF4 >= 1.2', + 'toolz >= 0.7.2', + 'dask >= 0.12', + 'xarray >= 0.9.1', + 'cloudpickle >= 0.2.1'], + tests_require=['pytest >= 2.7.1', + 'pytest-catchlog >= 1.0'], package_data={'aospy': ['test/data/netcdf/*.nc']}, license="Apache", keywords="climate science netcdf",