-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/rasterio #1070
Feature/rasterio #1070
Changes from all commits
b6080e1
c6fb2b5
abeff81
be6094e
9855d32
2a94fa4
dd71a45
1db0533
e094bec
21f4343
b11afaa
af902f0
0369ef8
bb73429
7037e9d
3e90b99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
name: test_env | ||
channels: | ||
- conda-forge | ||
dependencies: | ||
- python=3.4 | ||
- bottleneck | ||
- pytest | ||
- pandas | ||
- rasterio | ||
- scipy | ||
- pip: | ||
- coveralls | ||
- pytest-cov |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
[wheel] | ||
universal = 1 | ||
|
||
[pytest] | ||
[tool:pytest] | ||
python_files=test_*.py |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import numpy as np | ||
|
||
try: | ||
import rasterio | ||
except ImportError: | ||
rasterio = False | ||
|
||
from .. import Variable, DataArray | ||
from ..core.utils import FrozenOrderedDict, Frozen, NDArrayMixin | ||
from ..core import indexing | ||
from ..core.pycompat import OrderedDict, suppress | ||
|
||
from .common import AbstractDataStore | ||
|
||
__rio_varname__ = 'raster' | ||
|
||
|
||
class RasterioArrayWrapper(NDArrayMixin): | ||
def __init__(self, ds): | ||
self._ds = ds | ||
self.array = ds.read() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unless the behavior of rasterio changed with v1.0, this loads the data in memory. I might be wrong, but I think the call to I also wonder if the call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shoyer, do we need to maintain the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can basically drop this in replacing the current class RasterioArrayWrapper(NdimSizeLenMixin):
"""Mixin class RasterIO datasets for making wrappers of N-dimensional
arrays that conform to the ndarray interface required for the data
argument to Variable objects.
A subclass should set the `array` property and override one or more of
`dtype`, `shape` and `__getitem__`.
"""
def __init__(self, ds):
self.ds = ds
@property
def dtype(self):
if len(set(self.ds.dtypes[0])) != 1:
raise ValueError(
'Can only handle Rastio dataset with all bands having the same type')
return np.dtype(self.ds.dtypes[0])
@property
def shape(self):
return self.ds.shape
def __array__(self, dtype=None):
'''Not sure if this will work as is'''
return np.asarray(self[...], dtype=dtype)
def __getitem__(self, key):
band = range(self.shape[0])[key[0]]
window = []
for win in key[1:]:
if instance(win, slice):
window.append((win.start, win.stop))
elif isinstance(win, int):
window.append((win, win + 1))
else: # integer ndarray
window.append((win.min(), win.max()))
raw_data = self.ds.read(band, window=window)
# now, fix up raw_data to conform to numpy indexing conventions
# - drop axes for integer band/windows
# - stride if windows are slices with win.step != 1
# - subset if window is an integer ndarray
def __repr__(self):
return '%s(array=%r)' % (type(self).__name__, self.ds) I'm not sure the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's also worth consider how/if you want to handle automatically masking missing values -- it looks like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to confirm, the above code is waiting for the decision from rasterio group? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would suggest getting something working here, with an eye toward keeping it self-contained. Later, we can try to port it upstream to rasterio. |
||
|
||
@property | ||
def dtype(self): | ||
return np.dtype(self._ds.dtypes[0]) | ||
|
||
def __getitem__(self, key): | ||
if key == () and self.ndim == 0: | ||
return self.array.get_value() | ||
return self.array[key] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on what @fmaussion said above, I think this should be something like: def __getitem__(self, key):
if key == () and self.ndim == 0:
return self._ds.read()
return self._ds.read(band, window=window) Where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By the time indexers get here, they should have already passed through canonicalize_indexer, which means you only need to handle a Based on the docstring for read, we want something like: def __getitem__(self, key):
band = range(self.shape[0])[key[0]]
window = []
for win in key[1:]:
if instance(win, slice):
window.append((win.start, win.stop))
elif isinstance(win, int):
window.append((win, win + 1))
else: # integer ndarray
window.append((win.min(), win.max()))
raw_data = self._ds.read(band, window=window)
# now, fix up raw_data to conform to numpy indexing conventions
# - drop axes for integer band/windows
# - stride if windows are slices with win.step != 1
# - subset if window is an integer ndarray Honestly, this logic should probably live in rasterio if possible. I'm a little surprised that they have never implemented a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See also: https://gist.github.com/lpinner/bd57b54a5c6903e4a6a2 (can't reuse this directly, though, because it doesn't have a license). Anyways, I would definitely see if the rasterio folks are up for implementing a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See also rasterio/rasterio#920 for related discussion. |
||
|
||
|
||
class RasterioDataStore(AbstractDataStore): | ||
"""Store for accessing datasets via Rasterio | ||
""" | ||
def __init__(self, filename, mode='r'): | ||
|
||
with rasterio.Env(): | ||
self.ds = rasterio.open(filename, mode=mode, ) | ||
|
||
# Get coords | ||
nx, ny = self.ds.width, self.ds.height | ||
x0, y0 = self.ds.bounds.left, self.ds.bounds.top | ||
dx, dy = self.ds.res[0], -self.ds.res[1] | ||
|
||
self.coords = {'y': np.linspace(start=y0, num=ny, stop=(y0 + (ny-1) * dy)), | ||
'x': np.linspace(start=x0, num=nx, stop=(x0 + (nx-1) * dx))} | ||
|
||
# Get dims | ||
if self.ds.count >= 1: | ||
self.dims = ('band', 'y', 'x') | ||
self.coords['band'] = self.ds.indexes | ||
else: | ||
raise ValueError('unknown dims') | ||
|
||
self._attrs = OrderedDict() | ||
with suppress(AttributeError): | ||
for attr_name in ['crs', 'transform', 'proj']: | ||
self._attrs[attr_name] = getattr(self.ds, attr_name) | ||
|
||
def open_store_variable(self, var): | ||
if var != __rio_varname__: | ||
raise ValueError( | ||
'Rasterio variables are all named %s' % __rio_varname__) | ||
data = indexing.LazilyIndexedArray( | ||
RasterioArrayWrapper(self.ds)) | ||
return Variable(self.dims, data, self._attrs) | ||
|
||
def get_variables(self): | ||
# Get lat lon coordinates | ||
coords = _try_to_get_latlon_coords(self.coords, self._attrs) | ||
rio_vars = {__rio_varname__: self.open_store_variable(__rio_varname__)} | ||
rio_vars.update(coords) | ||
return FrozenOrderedDict(rio_vars) | ||
|
||
def get_attrs(self): | ||
return Frozen(self._attrs) | ||
|
||
def get_dimensions(self): | ||
return Frozen(self.ds.dims) | ||
|
||
def close(self): | ||
self.ds.close() | ||
|
||
|
||
def _transform_proj(p1, p2, x, y, nocopy=False): | ||
"""Wrapper around the pyproj transform. | ||
When two projections are equal, this function avoids quite a bunch of | ||
useless calculations. See https://github.com/jswhit/pyproj/issues/15 | ||
""" | ||
import pyproj | ||
import copy | ||
|
||
if p1.srs == p2.srs: | ||
if nocopy: | ||
return x, y | ||
else: | ||
return copy.deepcopy(x), copy.deepcopy(y) | ||
|
||
return pyproj.transform(p1, p2, x, y) | ||
|
||
|
||
def _try_to_get_latlon_coords(coords, attrs): | ||
coords_out = {} | ||
try: | ||
import pyproj | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little surprised rasterio doesn't have projections built in. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They do: https://mapbox.github.io/rasterio/topics/reproject.html I will take a stab at doing it the rasterio way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a reprojection, which does seem useful, but it seems separate from giving you |
||
except ImportError: | ||
pyproj = False | ||
if 'crs' in attrs and pyproj: | ||
proj = pyproj.Proj(attrs['crs']) | ||
x, y = np.meshgrid(coords['x'], coords['y']) | ||
proj_out = pyproj.Proj("+init=EPSG:4326", preserve_units=True) | ||
xc, yc = _transform_proj(proj, proj_out, x, y) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You might try to do this calculation lazily, e.g., by making a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I don't understand how to do this. Is there an example I can go off of? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have some examples of making NDArrayMixin subclasses in conventions.py, e.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another option would be to use |
||
dims = ('y', 'x') | ||
|
||
coords_out['lat'] = Variable(dims,yc,attrs={'units': 'degrees_north', 'long_name': 'latitude', | ||
'standard_name': 'latitude'}) | ||
coords_out['lon'] = Variable(dims,xc,attrs={'units': 'degrees_east', 'long_name': 'longitude', | ||
'standard_name': 'longitude'}) | ||
return coords_out |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ | |
|
||
from . import (TestCase, requires_scipy, requires_netCDF4, requires_pydap, | ||
requires_scipy_or_netCDF4, requires_dask, requires_h5netcdf, | ||
requires_pynio, has_netCDF4, has_scipy) | ||
requires_pynio, requires_rasterio, has_netCDF4, has_scipy) | ||
from .test_dataset import create_test_data | ||
|
||
try: | ||
|
@@ -1063,6 +1063,35 @@ def test_weakrefs(self): | |
self.assertDatasetIdentical(actual, expected) | ||
|
||
|
||
@requires_rasterio | ||
class TestRasterIO(CFEncodedDataTest, Only32BitTypes, TestCase): | ||
def test_write_store(self): | ||
# rasterio is read-only for now | ||
pass | ||
|
||
def test_orthogonal_indexing(self): | ||
# rasterio also does not support list-like indexing | ||
pass | ||
|
||
@contextlib.contextmanager | ||
def roundtrip(self, data, save_kwargs={}, open_kwargs={}): | ||
with create_tmp_file() as tmp_file: | ||
data.to_netcdf(tmp_file, engine='scipy', **save_kwargs) | ||
with open_dataset(tmp_file, engine='rasterio', **open_kwargs) as ds: | ||
yield ds | ||
|
||
def test_weakrefs(self): | ||
example = Dataset({'foo': ('x', np.arange(5.0))}) | ||
expected = example.rename({'foo': 'bar', 'x': 'y'}) | ||
|
||
with create_tmp_file() as tmp_file: | ||
example.to_netcdf(tmp_file, engine='scipy') | ||
on_disk = open_dataset(tmp_file, engine='rasterio') | ||
actual = on_disk.rename({'foo': 'bar', 'x': 'y'}) | ||
del on_disk # trigger garbage collection | ||
self.assertDatasetIdentical(actual, expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @NicWayand - take a look at the PyNio tests. They are probably the closest analog for what we need to test here. |
||
|
||
|
||
class TestEncodingInvalid(TestCase): | ||
|
||
def test_extract_nc4_encoding(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change can actually be permanent -- it's valuable for tracking down segfaults on Travis more broadly.