diff --git a/.travis.yml b/.travis.yml index ed82e1b2bff..5ec7a1f62cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,6 +39,8 @@ matrix: env: CONDA_ENV=py36-bottleneck-dev - python: 3.6 env: CONDA_ENV=py36-condaforge-rc + - python: 3.6 + env: CONDA_ENV=py36-pynio-dev allow_failures: - python: 3.6 env: @@ -59,6 +61,8 @@ matrix: env: CONDA_ENV=py36-bottleneck-dev - python: 3.6 env: CONDA_ENV=py36-condaforge-rc + - python: 3.6 + env: CONDA_ENV=py36-pynio-dev before_install: - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then diff --git a/ci/requirements-py36-pynio-dev.yml b/ci/requirements-py36-pynio-dev.yml new file mode 100644 index 00000000000..e19c6537c68 --- /dev/null +++ b/ci/requirements-py36-pynio-dev.yml @@ -0,0 +1,25 @@ +name: test_env +channels: + - conda-forge + - ncar +dependencies: + - python=3.6 + - dask + - distributed + - h5py + - h5netcdf + - matplotlib + - netcdf4 + - pynio=dev + - pytest + - numpy + - pandas + - scipy + - seaborn + - toolz + - rasterio + - bottleneck + - pip: + - coveralls + - pytest-cov + - pydap diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2f754c935df..51583a9da47 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -203,6 +203,7 @@ Bug fixes - ``:py:meth:`~xarray.Dataset.__init__` raises a ``MergeError`` if an coordinate shares a name with a dimension but is comprised of arbitrary dimensions(:issue:`1120`). + - :py:func:`~xarray.open_rasterio` method now skips rasterio.crs -attribute if it is none. By `Leevi Annala `_. @@ -215,6 +216,21 @@ Bug fixes when objects other than ``Dataset`` are provided (:issue:`1555`). By `Joe Hamman `_. +- :py:func:`xarray.concat` would eagerly load dask variables into memory if + the first argument was a numpy variable (:issue:`1588`). + By `Guido Imperiale `_. + +- Fix ``netCDF4`` backend to properly roundtrip the ``shuffle`` encoding option + (:issue:`1606`). + By `Joe Hamman `_. + +- Fix bug when using ``pytest`` class decorators to skiping certain unittests. + The previous behavior unintentionally causing additional tests to be skipped + (:issue:`1531`). By `Joe Hamman `_. + +- Fix pynio backend for upcoming release of pynio with python3 support + (:issue:`1611`). By `Ben Hillman `_. + .. _whats-new.0.9.6: v0.9.6 (8 June 2017) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index f42fa2b16e0..238e9ab4da9 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -153,7 +153,7 @@ def _extract_nc4_variable_encoding(variable, raise_on_invalid=False, safe_to_drop = set(['source', 'original_shape']) valid_encodings = set(['zlib', 'complevel', 'fletcher32', 'contiguous', - 'chunksizes']) + 'chunksizes', 'shuffle']) if lsd_okay: valid_encodings.add('least_significant_digit') diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index f70c286a990..df6a17ce374 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -57,7 +57,7 @@ def open_store_variable(self, name, var): def get_variables(self): with self.ensure_open(autoclose=False): return FrozenOrderedDict((k, self.open_store_variable(k, v)) - for k, v in self.ds.variables.iteritems()) + for k, v in self.ds.variables.items()) def get_attrs(self): with self.ensure_open(autoclose=True): diff --git a/xarray/conventions.py b/xarray/conventions.py index bbf2c02322f..d5394d5b2e1 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -858,12 +858,14 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, if ('_FillValue' in attributes and not utils.equivalent(attributes['_FillValue'], attributes['missing_value'])): - raise ValueError("Discovered conflicting _FillValue " - "and missing_value. Considering " - "opening the offending dataset using " - "decode_cf=False, corrected the attributes", - "and decoding explicitly using " - "xarray.conventions.decode_cf(ds)") + raise ValueError("Conflicting _FillValue and missing_value " + "attributes on a variable: {} vs. {}\n\n" + "Consider opening the offending dataset " + "using decode_cf=False, correcting the " + "attributes and decoding explicitly using " + "xarray.decode_cf()." + .format(attributes['_FillValue'], + attributes['missing_value'])) attributes['_FillValue'] = attributes.pop('missing_value') fill_value = np.array(pop_to(attributes, encoding, '_FillValue')) if fill_value.size > 1: diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 394e7182435..2f97de4a1ba 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -40,9 +40,11 @@ def _dask_or_eager_func(name, eager_module=np, list_of_args=False, """Create a function that dispatches to dask for dask array inputs.""" if has_dask: def f(*args, **kwargs): - dispatch_args = args[0] if list_of_args else args - if any(isinstance(a, da.Array) - for a in dispatch_args[:n_array_args]): + if list_of_args: + dispatch_args = args[0] + else: + dispatch_args = args[:n_array_args] + if any(isinstance(a, da.Array) for a in dispatch_args): module = da else: module = eager_module diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b5fb02a8083..320dcdd3847 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -236,8 +236,8 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, raise ValueError('index must be monotonic for resampling') s = pd.Series(np.arange(index.size), index) first_items = s.groupby(grouper).first() + full_index = first_items.index if first_items.isnull().any(): - full_index = first_items.index first_items = first_items.dropna() sbins = first_items.values.astype(np.int64) group_indices = ([slice(i, j) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index d3f448f823c..0ef32601b77 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -5,6 +5,7 @@ from contextlib import contextmanager from distutils.version import LooseVersion import re +import importlib import numpy as np from numpy.testing import assert_array_equal @@ -25,111 +26,50 @@ except ImportError: import mock -try: - import scipy - has_scipy = True -except ImportError: - has_scipy = False - -try: - import pydap.client - has_pydap = True -except ImportError: - has_pydap = False - -try: - import netCDF4 - has_netCDF4 = True -except ImportError: - has_netCDF4 = False - - -try: - import h5netcdf - has_h5netcdf = True -except ImportError: - has_h5netcdf = False - - -try: - import Nio - has_pynio = True -except ImportError: - has_pynio = False - - -try: - import dask.array - import dask - dask.set_options(get=dask.get) - has_dask = True -except ImportError: - has_dask = False - - -try: - import matplotlib - has_matplotlib = True -except ImportError: - has_matplotlib = False - -try: - import bottleneck - if LooseVersion(bottleneck.__version__) < LooseVersion('1.1'): - raise ImportError('Fall back to numpy') - has_bottleneck = True -except ImportError: - has_bottleneck = False - -try: - import rasterio - has_rasterio = True -except ImportError: - has_rasterio = False - -try: - import pathlib - has_pathlib = True -except ImportError: +def _importorskip(modname, minversion=None): try: - import pathlib2 - has_pathlib = True + mod = importlib.import_module(modname) + has = True + if minversion is not None: + if LooseVersion(mod.__version__) < LooseVersion(minversion): + raise ImportError('Minimum version not satisfied') except ImportError: - has_pathlib = False - - -# slighly simpler construction that the full functions. -# Generally `pytest.importorskip('package')` inline is even easier -requires_matplotlib = pytest.mark.skipif( - not has_matplotlib, reason='requires matplotlib') -requires_scipy = pytest.mark.skipif( - not has_scipy, reason='requires scipy') -requires_pydap = pytest.mark.skipif( - not has_pydap, reason='requires pydap') -requires_netCDF4 = pytest.mark.skipif( - not has_netCDF4, reason='requires netCDF4') -requires_h5netcdf = pytest.mark.skipif( - not has_h5netcdf, reason='requires h5netcdf') -requires_pynio = pytest.mark.skipif( - not has_pynio, reason='requires pynio') -requires_scipy_or_netCDF4 = pytest.mark.skipif( - not has_scipy and not has_netCDF4, reason='requires scipy or netCDF4') -requires_dask = pytest.mark.skipif( - not has_dask, reason='requires dask') -requires_bottleneck = pytest.mark.skipif( - not has_bottleneck, reason='requires bottleneck') -requires_rasterio = pytest.mark.skipif( - not has_rasterio, reason='requires rasterio') -requires_pathlib = pytest.mark.skipif( - not has_pathlib, reason='requires pathlib / pathlib2' -) - + has = False + # TODO: use pytest.skipif instead of unittest.skipUnless + # Using `unittest.skipUnless` is a temporary workaround for pytest#568, + # wherein class decorators stain inherited classes. + # xref: xarray#1531, implemented in xarray #1557. + func = unittest.skipUnless(has, reason='requires {}'.format(modname)) + return has, func + + +has_matplotlib, requires_matplotlib = _importorskip('matplotlib') +has_scipy, requires_scipy = _importorskip('scipy') +has_pydap, requires_pydap = _importorskip('pydap.client') +has_netCDF4, requires_netCDF4 = _importorskip('netCDF4') +has_h5netcdf, requires_h5netcdf = _importorskip('h5netcdf') +has_pynio, requires_pynio = _importorskip('Nio') +has_dask, requires_dask = _importorskip('dask') +has_bottleneck, requires_bottleneck = _importorskip('bottleneck') +has_rasterio, requires_rasterio = _importorskip('rasterio') +has_pathlib, requires_pathlib = _importorskip('pathlib') + +# some special cases +has_scipy_or_netCDF4 = has_scipy or has_netCDF4 +requires_scipy_or_netCDF4 = unittest.skipUnless( + has_scipy_or_netCDF4, reason='requires scipy or netCDF4') +if not has_pathlib: + has_pathlib, requires_pathlib = _importorskip('pathlib2') + +if has_dask: + import dask + dask.set_options(get=dask.get) try: _SKIP_FLAKY = not pytest.config.getoption("--run-flaky") _SKIP_NETWORK_TESTS = not pytest.config.getoption("--run-network-tests") -except ValueError: +except (ValueError, AttributeError): # Can't get config from pytest, e.g., because xarray is installed instead # of being run from a development version (and hence conftests.py is not # available). Don't run flaky tests. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a977868c7e6..c7bf5349c7a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -729,6 +729,7 @@ def test_compression_encoding(self): data['var2'].encoding.update({'zlib': True, 'chunksizes': (5, 5), 'fletcher32': True, + 'shuffle': True, 'original_shape': data.var2.shape}) with self.roundtrip(data) as actual: for k, v in iteritems(data['var2'].encoding): @@ -1179,7 +1180,7 @@ def test_encoding_unlimited_dims(self): # tests pending h5netcdf fix -@pytest.mark.xfail +@unittest.skip class H5NetCDFDataTestAutocloseTrue(H5NetCDFDataTest): autoclose = True @@ -1846,6 +1847,11 @@ def test_extract_nc4_variable_encoding(self): encoding = _extract_nc4_variable_encoding(var) self.assertEqual({}, encoding) + # regression test + var = xr.Variable(('x',), [1, 2, 3], {}, {'shuffle': True}) + encoding = _extract_nc4_variable_encoding(var, raise_on_invalid=True) + self.assertEqual({'shuffle': True}, encoding) + def test_extract_h5nc_encoding(self): # not supported with h5netcdf (yet) var = xr.Variable(('x',), [1, 2, 3], {}, diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index a6230761b86..6aaa2cbfa89 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -451,11 +451,9 @@ def test_cf_timedelta(self): ('1us', 'microseconds', np.int64(1)), (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), (['30m', '60m'], 'hours', [0.5, 1.0]), + (np.timedelta64('NaT', 'ns'), 'days', np.nan), + (['NaT', 'NaT'], 'days', [np.nan, np.nan]), ] - if pd.__version__ >= '0.16': - # not quite sure why, but these examples don't work on older pandas - examples.extend([(np.timedelta64('NaT', 'ns'), 'days', np.nan), - (['NaT', 'NaT'], 'days', [np.nan, np.nan])]) for timedeltas, units, numbers in examples: timedeltas = pd.to_timedelta(timedeltas, box=False) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index a73d3913bde..59b0d302151 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -4,6 +4,8 @@ import pickle from textwrap import dedent + +from distutils.version import LooseVersion import numpy as np import pandas as pd import pytest @@ -12,13 +14,12 @@ from xarray import Variable, DataArray, Dataset import xarray.ufuncs as xu from xarray.core.pycompat import suppress -from . import TestCase, requires_dask +from . import TestCase from xarray.tests import mock -with suppress(ImportError): - import dask - import dask.array as da +dask = pytest.importorskip('dask') +import dask.array as da class DaskTestCase(TestCase): @@ -44,7 +45,6 @@ def assertLazyAnd(self, expected, actual, test): assert False -@requires_dask class TestVariable(DaskTestCase): def assertLazyAndIdentical(self, expected, actual): self.assertLazyAnd(expected, actual, self.assertVariableIdentical) @@ -178,6 +178,8 @@ def test_concat(self): v = self.lazy_var self.assertLazyAndIdentical(u, Variable.concat([v[:2], v[2:]], 'x')) self.assertLazyAndIdentical(u[:2], Variable.concat([v[0], v[1]], 'x')) + self.assertLazyAndIdentical(u[:2], Variable.concat([u[0], v[1]], 'x')) + self.assertLazyAndIdentical(u[:2], Variable.concat([v[0], u[1]], 'x')) self.assertLazyAndIdentical( u[:3], Variable.concat([v[[0, 2]], v[[1]]], 'x', positions=[[0, 2], [1]])) @@ -204,7 +206,6 @@ def test_bivariate_ufunc(self): self.assertLazyAndAllClose(np.maximum(u, 0), xu.maximum(0, v)) -@requires_dask class TestDataArrayAndDataset(DaskTestCase): def assertLazyAndIdentical(self, expected, actual): self.assertLazyAnd(expected, actual, self.assertDataArrayIdentical) @@ -321,6 +322,10 @@ def test_concat_loads_variables(self): assert ds3['c'].data is c3 def test_groupby(self): + if LooseVersion(dask.__version__) == LooseVersion('0.15.3'): + pytest.xfail('upstream bug in dask: ' + 'https://github.com/dask/dask/issues/2718') + u = self.eager_array v = self.lazy_array @@ -542,7 +547,6 @@ def test_from_dask_variable(self): self.assertLazyAndIdentical(self.lazy_array, a) -@requires_dask @pytest.mark.parametrize("method", ['load', 'compute']) def test_dask_kwargs_variable(method): x = Variable('y', da.from_array(np.arange(3), chunks=(2,))) @@ -553,7 +557,6 @@ def test_dask_kwargs_variable(method): mock_compute.assert_called_with(foo='bar') -@requires_dask @pytest.mark.parametrize("method", ['load', 'compute', 'persist']) def test_dask_kwargs_dataarray(method): data = da.from_array(np.arange(3), chunks=(2,)) @@ -568,7 +571,6 @@ def test_dask_kwargs_dataarray(method): mock_func.assert_called_with(data, foo='bar') -@requires_dask @pytest.mark.parametrize("method", ['load', 'compute', 'persist']) def test_dask_kwargs_dataset(method): data = da.from_array(np.arange(3), chunks=(2,)) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index e4ab2303fe1..4b1a9d7880f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -537,13 +537,9 @@ def test_sel_method(self): actual = data.sel(y=['ab', 'ba'], method='pad') self.assertDataArrayIdentical(expected, actual) - if pd.__version__ >= '0.17': - expected = data.sel(x=[1, 2]) - actual = data.sel(x=[0.9, 1.9], method='backfill', tolerance=1) - self.assertDataArrayIdentical(expected, actual) - else: - with self.assertRaisesRegexp(TypeError, 'tolerance'): - data.sel(x=[0.9, 1.9], method='backfill', tolerance=1) + expected = data.sel(x=[1, 2]) + actual = data.sel(x=[0.9, 1.9], method='backfill', tolerance=1) + self.assertDataArrayIdentical(expected, actual) def test_sel_drop(self): data = DataArray([1, 2, 3], [('x', [0, 1, 2])]) @@ -917,10 +913,9 @@ def test_reindex_regressions(self): def test_reindex_method(self): x = DataArray([10, 20], dims='y', coords={'y': [0, 1]}) y = [-0.1, 0.5, 1.1] - if pd.__version__ >= '0.17': - actual = x.reindex(y=y, method='backfill', tolerance=0.2) - expected = DataArray([10, np.nan, np.nan], coords=[('y', y)]) - self.assertDataArrayIdentical(expected, actual) + actual = x.reindex(y=y, method='backfill', tolerance=0.2) + expected = DataArray([10, np.nan, np.nan], coords=[('y', y)]) + self.assertDataArrayIdentical(expected, actual) alt = Dataset({'y': y}) actual = x.reindex_like(alt, method='backfill') @@ -2051,7 +2046,7 @@ def test_upsample_nd(self): ('x', 'y', 'time')) self.assertDataArrayIdentical(expected, actual) - @requires_scipy + @requires_scipy def test_upsample_interpolate(self): from scipy.interpolate import interp1d xs = np.arange(6) @@ -2082,6 +2077,15 @@ def test_upsample_interpolate(self): # done here due to floating point arithmetic self.assertDataArrayAllClose(expected, actual, rtol=1e-16) + @requires_scipy + def test_upsample_interpolate_regression_1605(self): + dates = pd.date_range('2016-01-01', '2016-03-31', freq='1D') + expected = xr.DataArray(np.random.random((len(dates), 2, 3)), + dims=('time', 'x', 'y'), + coords={'time': dates}) + actual = expected.resample(time='1D').interpolate('linear') + assert_allclose(actual, expected, rtol=1e-16) + @requires_dask def test_upsample_interpolate_dask(self): import dask.array as da @@ -2807,7 +2811,7 @@ def da_dask(seed=123): da['time'] = times return da - + def test_rolling_iter(da): rolling_obj = da.rolling(time=7) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 85b18e77975..eda3b03a2e5 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1016,24 +1016,21 @@ def test_sel_points(self): method='pad') self.assertDatasetIdentical(expected, actual) - if pd.__version__ >= '0.17': - with self.assertRaises(KeyError): - data.sel_points(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) + with self.assertRaises(KeyError): + data.sel_points(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) def test_sel_method(self): data = create_test_data() - if pd.__version__ >= '0.16': - expected = data.sel(dim2=1) - actual = data.sel(dim2=0.95, method='nearest') - self.assertDatasetIdentical(expected, actual) + expected = data.sel(dim2=1) + actual = data.sel(dim2=0.95, method='nearest') + self.assertDatasetIdentical(expected, actual) - if pd.__version__ >= '0.17': - actual = data.sel(dim2=0.95, method='nearest', tolerance=1) - self.assertDatasetIdentical(expected, actual) + actual = data.sel(dim2=0.95, method='nearest', tolerance=1) + self.assertDatasetIdentical(expected, actual) - with self.assertRaises(KeyError): - actual = data.sel(dim2=np.pi, method='nearest', tolerance=0) + with self.assertRaises(KeyError): + actual = data.sel(dim2=np.pi, method='nearest', tolerance=0) expected = data.sel(dim2=[1.5]) actual = data.sel(dim2=[1.45], method='backfill') @@ -1194,13 +1191,9 @@ def test_reindex_method(self): expected = Dataset({'x': ('y', [10, 20, np.nan]), 'y': y}) self.assertDatasetIdentical(expected, actual) - if pd.__version__ >= '0.17': - actual = ds.reindex(y=y, method='backfill', tolerance=0.1) - expected = Dataset({'x': ('y', 3 * [np.nan]), 'y': y}) - self.assertDatasetIdentical(expected, actual) - else: - with self.assertRaisesRegexp(TypeError, 'tolerance'): - ds.reindex(y=y, method='backfill', tolerance=0.1) + actual = ds.reindex(y=y, method='backfill', tolerance=0.1) + expected = Dataset({'x': ('y', 3 * [np.nan]), 'y': y}) + self.assertDatasetIdentical(expected, actual) actual = ds.reindex(y=y, method='pad') expected = Dataset({'x': ('y', [np.nan, 10, 20]), 'y': y}) @@ -2404,10 +2397,6 @@ def test_convert_dataframe_with_many_types_and_multiindex(self): # we can't do perfectly, but we should be at least as faithful as # np.asarray expected = df.apply(np.asarray) - if pd.__version__ < '0.17': - # datetime with timezone dtype is not consistent on old pandas - roundtripped = roundtripped.drop(['h'], axis=1) - expected = expected.drop(['h'], axis=1) assert roundtripped.equals(expected) def test_to_and_from_dict(self): diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index cba03bd8361..bdd0fec777f 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1036,10 +1036,6 @@ def test_no_args(self): for ax in self.g.axes.flat: self.assertTrue(ax.has_data()) - # default font size should be small - fontsize = ax.title.get_size() - self.assertLessEqual(fontsize, 12) - @pytest.mark.slow def test_names_appear_somewhere(self): self.darray.name = 'testvar'