Skip to content

Commit

Permalink
Merge branch 'master' into file-manager-del
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer committed Dec 23, 2018
2 parents fd12b18 + 9352b3c commit b2bb60e
Show file tree
Hide file tree
Showing 16 changed files with 312 additions and 53 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ matrix:
- env: CONDA_ENV=py36-bottleneck-dev
- env: CONDA_ENV=py36-condaforge-rc
- env: CONDA_ENV=py36-pynio-dev
- env: CONDA_ENV=py36-rasterio-0.36
- env: CONDA_ENV=py36-rasterio
- env: CONDA_ENV=py36-zarr-dev
- env: CONDA_ENV=docs
- env: CONDA_ENV=py36-hypothesis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies:
- scipy
- seaborn
- toolz
- rasterio=0.36.0
- rasterio>=1.0
- bottleneck
- pip:
- coveralls
Expand Down
2 changes: 1 addition & 1 deletion doc/installing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ For netCDF and IO
for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files
(ffi1001) and many other.
- `rasterio <https://github.com/mapbox/rasterio>`__: for reading GeoTiffs and
other gridded raster datasets.
other gridded raster datasets. (version 1.0 or later)
- `iris <https://github.com/scitools/iris>`__: for conversion to and from iris'
Cube objects
- `cfgrib <https://github.com/ecmwf/cfgrib>`__: for reading GRIB files via the
Expand Down
19 changes: 19 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ v0.11.1 (unreleased)
Breaking changes
~~~~~~~~~~~~~~~~

- Minimum rasterio version increased from 0.36 to 1.0 (for ``open_rasterio``)
- Time bounds variables are now also decoded according to CF conventions
(:issue:`2565`). The previous behavior was to decode them only if they
had specific time attributes, now these attributes are copied
automatically from the corresponding time coordinate. This might
brake downstream code that was relying on these variables to be
not decoded.
By `Fabien Maussion <https://github.com/fmaussion>`_.

Enhancements
~~~~~~~~~~~~

Expand All @@ -41,6 +50,10 @@ Enhancements
- :py:class:`CFTimeIndex` uses slicing for string indexing when possible (like
:py:class:`pandas.DatetimeIndex`), which avoids unnecessary copies.
By `Stephan Hoyer <https://github.com/shoyer>`_
- Enable passing ``rasterio.io.DatasetReader`` or ``rasterio.vrt.WarpedVRT`` to
``open_rasterio`` instead of file path string. Allows for in-memory
reprojection, see (:issue:`2588`).
By `Scott Henderson <https://github.com/scottyhq>`_.
- Like :py:class:`pandas.DatetimeIndex`, :py:class:`CFTimeIndex` now supports
"dayofyear" and "dayofweek" accessors (:issue:`2597`). By `Spencer Clark
<https://github.com/spencerkclark>`_.
Expand All @@ -50,6 +63,12 @@ Enhancements
recommend enabling it in your test suites if you use xarray for IO.
By `Stephan Hoyer <https://github.com/shoyer>`_
- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://matthewrocklin.com>`_.
- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
``loffset`` kwarg just like Pandas.
By `Deepak Cherian <https://github.com/dcherian>`_
- 0d slices of ndarrays are now obtained directly through indexing, rather than
extracting and wrapping a scalar, avoiding unnecessary copying. By `Daniel
Wennberg <https://github.com/danielwe>`_.

Bug fixes
~~~~~~~~~
Expand Down
37 changes: 28 additions & 9 deletions xarray/backends/rasterio_.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import warnings
from collections import OrderedDict
from distutils.version import LooseVersion

import numpy as np

from .. import DataArray
Expand All @@ -24,13 +23,16 @@
class RasterioArrayWrapper(BackendArray):
"""A wrapper around rasterio dataset objects"""

def __init__(self, manager, lock):
def __init__(self, manager, lock, vrt_params=None):
from rasterio.vrt import WarpedVRT
self.manager = manager
self.lock = lock

# cannot save riods as an attribute: this would break pickleability
riods = manager.acquire()

if vrt_params is not None:
riods = WarpedVRT(riods, **vrt_params)
self.vrt_params = vrt_params
self._shape = (riods.count, riods.height, riods.width)

dtypes = riods.dtypes
Expand Down Expand Up @@ -104,6 +106,7 @@ def _get_indexer(self, key):
return band_key, tuple(window), tuple(squeeze_axis), tuple(np_inds)

def _getitem(self, key):
from rasterio.vrt import WarpedVRT
band_key, window, squeeze_axis, np_inds = self._get_indexer(key)

if not band_key or any(start == stop for (start, stop) in window):
Expand All @@ -114,6 +117,8 @@ def _getitem(self, key):
else:
with self.lock:
riods = self.manager.acquire(needs_lock=False)
if self.vrt_params is not None:
riods = WarpedVRT(riods, **self.vrt_params)
out = riods.read(band_key, window=window)

if squeeze_axis:
Expand Down Expand Up @@ -178,8 +183,8 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
Parameters
----------
filename : str
Path to the file to open.
filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
Path to the file to open. Or already open rasterio dataset.
parse_coordinates : bool, optional
Whether to parse the x and y coordinates out of the file's
``transform`` attribute or not. The default is to automatically
Expand All @@ -206,14 +211,28 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
data : DataArray
The newly created DataArray.
"""

import rasterio
from rasterio.vrt import WarpedVRT
vrt_params = None
if isinstance(filename, rasterio.io.DatasetReader):
filename = filename.name
elif isinstance(filename, rasterio.vrt.WarpedVRT):
vrt = filename
filename = vrt.src_dataset.name
vrt_params = dict(crs=vrt.crs.to_string(),
resampling=vrt.resampling,
src_nodata=vrt.src_nodata,
dst_nodata=vrt.dst_nodata,
tolerance=vrt.tolerance,
warp_extras=vrt.warp_extras)

if lock is None:
lock = RASTERIO_LOCK

manager = CachingFileManager(rasterio.open, filename, lock=lock, mode='r')
riods = manager.acquire()
if vrt_params is not None:
riods = WarpedVRT(riods, **vrt_params)

if cache is None:
cache = chunks is None
Expand Down Expand Up @@ -287,14 +306,14 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
for k, v in meta.items():
# Add values as coordinates if they match the band count,
# as attributes otherwise
if (isinstance(v, (list, np.ndarray)) and
len(v) == riods.count):
if (isinstance(v, (list, np.ndarray))
and len(v) == riods.count):
coords[k] = ('band', np.asarray(v))
else:
attrs[k] = v

data = indexing.LazilyOuterIndexedArray(
RasterioArrayWrapper(manager, lock))
RasterioArrayWrapper(manager, lock, vrt_params))

# this lets you write arrays loaded with rasterio
data = indexing.CopyOnWriteArray(data)
Expand Down
34 changes: 33 additions & 1 deletion xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
return Variable(dimensions, data, attributes, encoding=encoding)


def _update_bounds_attributes(variables):
"""Adds time attributes to time bounds variables.
Variables handling time bounds ("Cell boundaries" in the CF
conventions) do not necessarily carry the necessary attributes to be
decoded. This copies the attributes from the time variable to the
associated boundaries.
See Also:
http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
cf-conventions.html#cell-boundaries
https://github.com/pydata/xarray/issues/2565
"""

# For all time variables with bounds
for v in variables.values():
attrs = v.attrs
has_date_units = 'units' in attrs and 'since' in attrs['units']
if has_date_units and 'bounds' in attrs:
if attrs['bounds'] in variables:
bounds_attrs = variables[attrs['bounds']].attrs
bounds_attrs.setdefault('units', attrs['units'])
if 'calendar' in attrs:
bounds_attrs.setdefault('calendar', attrs['calendar'])


def decode_cf_variables(variables, attributes, concat_characters=True,
mask_and_scale=True, decode_times=True,
decode_coords=True, drop_variables=None):
"""
Decode a several CF encoded variables.
Decode several CF encoded variables.
See: decode_cf_variable
"""
Expand All @@ -350,6 +378,10 @@ def stackable(dim):
drop_variables = []
drop_variables = set(drop_variables)

# Time bounds coordinates might miss the decoding attributes
if decode_times:
_update_bounds_attributes(variables)

new_vars = OrderedDict()
for k, v in iteritems(variables):
if k in drop_variables:
Expand Down
9 changes: 7 additions & 2 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
center=center)

def resample(self, indexer=None, skipna=None, closed=None, label=None,
base=0, keep_attrs=None, **indexer_kwargs):
base=0, keep_attrs=None, loffset=None, **indexer_kwargs):
"""Returns a Resample object for performing resampling operations.
Handles both downsampling and upsampling. If any intervals contain no
Expand All @@ -612,6 +612,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for '24H' frequency, base could
range from 0 through 23.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
Expand Down Expand Up @@ -700,7 +703,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,

group = DataArray(dim_coord, coords=dim_coord.coords,
dims=dim_coord.dims, name=RESAMPLE_DIM)
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base)
# TODO: to_offset() call required for pandas==0.19.2
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base,
loffset=pd.tseries.frequencies.to_offset(loffset))
resampler = self._resample_cls(self, group=group, dim=dim_name,
grouper=grouper,
resample_dim=RESAMPLE_DIM)
Expand Down
28 changes: 28 additions & 0 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
import warnings

import datetime
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -154,6 +155,32 @@ def _unique_and_monotonic(group):
return index.is_unique and index.is_monotonic


def _apply_loffset(grouper, result):
"""
(copied from pandas)
if loffset is set, offset the result index
This is NOT an idempotent routine, it will be applied
exactly once to the result.
Parameters
----------
result : Series or DataFrame
the result of resample
"""

needs_offset = (
isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
and isinstance(result.index, pd.DatetimeIndex)
and len(result.index) > 0
)

if needs_offset:
result.index = result.index + grouper.loffset

grouper.loffset = None


class GroupBy(SupportsArithmetic):
"""A object that implements the split-apply-combine pattern.
Expand Down Expand Up @@ -235,6 +262,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
raise ValueError('index must be monotonic for resampling')
s = pd.Series(np.arange(index.size), index)
first_items = s.groupby(grouper).first()
_apply_loffset(grouper, first_items)
full_index = first_items.index
if first_items.isnull().any():
first_items = first_items.dropna()
Expand Down
16 changes: 5 additions & 11 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1142,15 +1142,6 @@ def __init__(self, array):
'Trying to wrap {}'.format(type(array)))
self.array = array

def _ensure_ndarray(self, value):
# We always want the result of indexing to be a NumPy array. If it's
# not, then it really should be a 0d array. Doing the coercion here
# instead of inside variable.as_compatible_data makes it less error
# prone.
if not isinstance(value, np.ndarray):
value = utils.to_0d_array(value)
return value

def _indexing_array_and_key(self, key):
if isinstance(key, OuterIndexer):
array = self.array
Expand All @@ -1160,7 +1151,10 @@ def _indexing_array_and_key(self, key):
key = key.tuple
elif isinstance(key, BasicIndexer):
array = self.array
key = key.tuple
# We want 0d slices rather than scalars. This is achieved by
# appending an ellipsis (see
# https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes). # noqa
key = key.tuple + (Ellipsis,)
else:
raise TypeError('unexpected key type: {}'.format(type(key)))

Expand All @@ -1171,7 +1165,7 @@ def transpose(self, order):

def __getitem__(self, key):
array, key = self._indexing_array_and_key(key)
return self._ensure_ndarray(array[key])
return array[key]

def __setitem__(self, key, value):
array, key = self._indexing_array_and_key(key)
Expand Down
Loading

0 comments on commit b2bb60e

Please sign in to comment.