Merge branch 'master' into file-manager-del

pydata · Dec 23, 2018 · b2bb60e · b2bb60e
2 parents fd12b18 + 9352b3c
commit b2bb60e
Show file tree

Hide file tree

Showing 16 changed files with 312 additions and 53 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -29,7 +29,7 @@ matrix:
   - env: CONDA_ENV=py36-bottleneck-dev
   - env: CONDA_ENV=py36-condaforge-rc
   - env: CONDA_ENV=py36-pynio-dev
-  - env: CONDA_ENV=py36-rasterio-0.36
+  - env: CONDA_ENV=py36-rasterio
   - env: CONDA_ENV=py36-zarr-dev
   - env: CONDA_ENV=docs
   - env: CONDA_ENV=py36-hypothesis

diff --git a/ci/requirements-py36-rasterio-0.36.yml → ci/requirements-py36-rasterio.yml b/ci/requirements-py36-rasterio-0.36.yml → ci/requirements-py36-rasterio.yml
@@ -16,7 +16,7 @@ dependencies:
   - scipy
   - seaborn
   - toolz
-  - rasterio=0.36.0
+  - rasterio>=1.0
   - bottleneck
   - pip:
     - coveralls

diff --git a/doc/installing.rst b/doc/installing.rst
@@ -32,7 +32,7 @@ For netCDF and IO
   for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files
   (ffi1001) and many other.
 - `rasterio <https://github.com/mapbox/rasterio>`__: for reading GeoTiffs and
-  other gridded raster datasets.
+  other gridded raster datasets. (version 1.0 or later)
 - `iris <https://github.com/scitools/iris>`__: for conversion to and from iris'
   Cube objects
 - `cfgrib <https://github.com/ecmwf/cfgrib>`__: for reading GRIB files via the

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -33,6 +33,15 @@ v0.11.1 (unreleased)
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
+- Minimum rasterio version increased from 0.36 to 1.0 (for ``open_rasterio``)
+- Time bounds variables are now also decoded according to CF conventions
+  (:issue:`2565`). The previous behavior was to decode them only if they
+  had specific time attributes, now these attributes are copied 
+  automatically from the corresponding time coordinate. This might 
+  brake downstream code that was relying on these variables to be
+  not decoded.
+  By `Fabien Maussion <https://github.com/fmaussion>`_.
+
 Enhancements
 ~~~~~~~~~~~~
 
@@ -41,6 +50,10 @@ Enhancements
 - :py:class:`CFTimeIndex` uses slicing for string indexing when possible (like
   :py:class:`pandas.DatetimeIndex`), which avoids unnecessary copies.
   By `Stephan Hoyer <https://github.com/shoyer>`_
+- Enable passing ``rasterio.io.DatasetReader`` or ``rasterio.vrt.WarpedVRT`` to
+  ``open_rasterio`` instead of file path string. Allows for in-memory
+  reprojection, see  (:issue:`2588`).
+  By `Scott Henderson <https://github.com/scottyhq>`_.
 - Like :py:class:`pandas.DatetimeIndex`, :py:class:`CFTimeIndex` now supports
   "dayofyear" and "dayofweek" accessors (:issue:`2597`).  By `Spencer Clark
   <https://github.com/spencerkclark>`_.
@@ -50,6 +63,12 @@ Enhancements
   recommend enabling it in your test suites if you use xarray for IO.
   By `Stephan Hoyer <https://github.com/shoyer>`_
 - Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://matthewrocklin.com>`_.
+- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
+  ``loffset`` kwarg just like Pandas.
+  By `Deepak Cherian <https://github.com/dcherian>`_
+- 0d slices of ndarrays are now obtained directly through indexing, rather than
+  extracting and wrapping a scalar, avoiding unnecessary copying. By `Daniel
+  Wennberg <https://github.com/danielwe>`_.
 
 Bug fixes
 ~~~~~~~~~

diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
@@ -2,7 +2,6 @@
 import warnings
 from collections import OrderedDict
 from distutils.version import LooseVersion
-
 import numpy as np
 
 from .. import DataArray
@@ -24,13 +23,16 @@
 class RasterioArrayWrapper(BackendArray):
     """A wrapper around rasterio dataset objects"""
 
-    def __init__(self, manager, lock):
+    def __init__(self, manager, lock, vrt_params=None):
+        from rasterio.vrt import WarpedVRT
         self.manager = manager
         self.lock = lock
 
         # cannot save riods as an attribute: this would break pickleability
         riods = manager.acquire()
-
+        if vrt_params is not None:
+            riods = WarpedVRT(riods, **vrt_params)
+        self.vrt_params = vrt_params
         self._shape = (riods.count, riods.height, riods.width)
 
         dtypes = riods.dtypes
@@ -104,6 +106,7 @@ def _get_indexer(self, key):
         return band_key, tuple(window), tuple(squeeze_axis), tuple(np_inds)
 
     def _getitem(self, key):
+        from rasterio.vrt import WarpedVRT
         band_key, window, squeeze_axis, np_inds = self._get_indexer(key)
 
         if not band_key or any(start == stop for (start, stop) in window):
@@ -114,6 +117,8 @@ def _getitem(self, key):
         else:
             with self.lock:
                 riods = self.manager.acquire(needs_lock=False)
+                if self.vrt_params is not None:
+                    riods = WarpedVRT(riods, **self.vrt_params)
                 out = riods.read(band_key, window=window)
 
         if squeeze_axis:
@@ -178,8 +183,8 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
 
     Parameters
     ----------
-    filename : str
-        Path to the file to open.
+    filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
+        Path to the file to open. Or already open rasterio dataset.
     parse_coordinates : bool, optional
         Whether to parse the x and y coordinates out of the file's
         ``transform`` attribute or not. The default is to automatically
@@ -206,14 +211,28 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
     data : DataArray
         The newly created DataArray.
     """
-
     import rasterio
+    from rasterio.vrt import WarpedVRT
+    vrt_params = None
+    if isinstance(filename, rasterio.io.DatasetReader):
+        filename = filename.name
+    elif isinstance(filename, rasterio.vrt.WarpedVRT):
+        vrt = filename
+        filename = vrt.src_dataset.name
+        vrt_params = dict(crs=vrt.crs.to_string(),
+                          resampling=vrt.resampling,
+                          src_nodata=vrt.src_nodata,
+                          dst_nodata=vrt.dst_nodata,
+                          tolerance=vrt.tolerance,
+                          warp_extras=vrt.warp_extras)
 
     if lock is None:
         lock = RASTERIO_LOCK
 
     manager = CachingFileManager(rasterio.open, filename, lock=lock, mode='r')
     riods = manager.acquire()
+    if vrt_params is not None:
+        riods = WarpedVRT(riods, **vrt_params)
 
     if cache is None:
         cache = chunks is None
@@ -287,14 +306,14 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
         for k, v in meta.items():
             # Add values as coordinates if they match the band count,
             # as attributes otherwise
-            if (isinstance(v, (list, np.ndarray)) and
-                    len(v) == riods.count):
+            if (isinstance(v, (list, np.ndarray))
+                    and len(v) == riods.count):
                 coords[k] = ('band', np.asarray(v))
             else:
                 attrs[k] = v
 
     data = indexing.LazilyOuterIndexedArray(
-        RasterioArrayWrapper(manager, lock))
+        RasterioArrayWrapper(manager, lock, vrt_params))
 
     # this lets you write arrays loaded with rasterio
     data = indexing.CopyOnWriteArray(data)

diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
     return Variable(dimensions, data, attributes, encoding=encoding)
 
 
+def _update_bounds_attributes(variables):
+    """Adds time attributes to time bounds variables.
+
+    Variables handling time bounds ("Cell boundaries" in the CF
+    conventions) do not necessarily carry the necessary attributes to be
+    decoded. This copies the attributes from the time variable to the
+    associated boundaries.
+
+    See Also:
+
+    http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
+         cf-conventions.html#cell-boundaries
+
+    https://github.com/pydata/xarray/issues/2565
+    """
+
+    # For all time variables with bounds
+    for v in variables.values():
+        attrs = v.attrs
+        has_date_units = 'units' in attrs and 'since' in attrs['units']
+        if has_date_units and 'bounds' in attrs:
+            if attrs['bounds'] in variables:
+                bounds_attrs = variables[attrs['bounds']].attrs
+                bounds_attrs.setdefault('units', attrs['units'])
+                if 'calendar' in attrs:
+                    bounds_attrs.setdefault('calendar', attrs['calendar'])
+
+
 def decode_cf_variables(variables, attributes, concat_characters=True,
                         mask_and_scale=True, decode_times=True,
                         decode_coords=True, drop_variables=None):
     """
-    Decode a several CF encoded variables.
+    Decode several CF encoded variables.
 
     See: decode_cf_variable
     """
@@ -350,6 +378,10 @@ def stackable(dim):
         drop_variables = []
     drop_variables = set(drop_variables)
 
+    # Time bounds coordinates might miss the decoding attributes
+    if decode_times:
+        _update_bounds_attributes(variables)
+
     new_vars = OrderedDict()
     for k, v in iteritems(variables):
         if k in drop_variables:

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -592,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
                                  center=center)
 
     def resample(self, indexer=None, skipna=None, closed=None, label=None,
-                 base=0, keep_attrs=None, **indexer_kwargs):
+                 base=0, keep_attrs=None, loffset=None, **indexer_kwargs):
         """Returns a Resample object for performing resampling operations.
 
         Handles both downsampling and upsampling. If any intervals contain no
@@ -612,6 +612,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for '24H' frequency, base could
             range from 0 through 23.
+        loffset : timedelta or str, optional
+            Offset used to adjust the resampled time labels. Some pandas date
+            offset strings are supported.
         keep_attrs : bool, optional
             If True, the object's attributes (`attrs`) will be copied from
             the original object to the new one.  If False (default), the new
@@ -700,7 +703,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
 
         group = DataArray(dim_coord, coords=dim_coord.coords,
                           dims=dim_coord.dims, name=RESAMPLE_DIM)
-        grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base)
+        # TODO: to_offset() call required for pandas==0.19.2
+        grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base,
+                             loffset=pd.tseries.frequencies.to_offset(loffset))
         resampler = self._resample_cls(self, group=group, dim=dim_name,
                                        grouper=grouper,
                                        resample_dim=RESAMPLE_DIM)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -3,6 +3,7 @@
 import functools
 import warnings
 
+import datetime
 import numpy as np
 import pandas as pd
 
@@ -154,6 +155,32 @@ def _unique_and_monotonic(group):
         return index.is_unique and index.is_monotonic
 
 
+def _apply_loffset(grouper, result):
+    """
+    (copied from pandas)
+    if loffset is set, offset the result index
+
+    This is NOT an idempotent routine, it will be applied
+    exactly once to the result.
+
+    Parameters
+    ----------
+    result : Series or DataFrame
+        the result of resample
+    """
+
+    needs_offset = (
+        isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
+        and isinstance(result.index, pd.DatetimeIndex)
+        and len(result.index) > 0
+    )
+
+    if needs_offset:
+        result.index = result.index + grouper.loffset
+
+    grouper.loffset = None
+
+
 class GroupBy(SupportsArithmetic):
     """A object that implements the split-apply-combine pattern.
 
@@ -235,6 +262,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
                 raise ValueError('index must be monotonic for resampling')
             s = pd.Series(np.arange(index.size), index)
             first_items = s.groupby(grouper).first()
+            _apply_loffset(grouper, first_items)
             full_index = first_items.index
             if first_items.isnull().any():
                 first_items = first_items.dropna()

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -1142,15 +1142,6 @@ def __init__(self, array):
                             'Trying to wrap {}'.format(type(array)))
         self.array = array
 
-    def _ensure_ndarray(self, value):
-        # We always want the result of indexing to be a NumPy array. If it's
-        # not, then it really should be a 0d array. Doing the coercion here
-        # instead of inside variable.as_compatible_data makes it less error
-        # prone.
-        if not isinstance(value, np.ndarray):
-            value = utils.to_0d_array(value)
-        return value
-
     def _indexing_array_and_key(self, key):
         if isinstance(key, OuterIndexer):
             array = self.array
@@ -1160,7 +1151,10 @@ def _indexing_array_and_key(self, key):
             key = key.tuple
         elif isinstance(key, BasicIndexer):
             array = self.array
-            key = key.tuple
+            # We want 0d slices rather than scalars. This is achieved by
+            # appending an ellipsis (see
+            # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes).  # noqa
+            key = key.tuple + (Ellipsis,)
         else:
             raise TypeError('unexpected key type: {}'.format(type(key)))
 
@@ -1171,7 +1165,7 @@ def transpose(self, order):
 
     def __getitem__(self, key):
         array, key = self._indexing_array_and_key(key)
-        return self._ensure_ndarray(array[key])
+        return array[key]
 
     def __setitem__(self, key, value):
         array, key = self._indexing_array_and_key(key)