From 6a956104cd9fc6b2c2fc8f07a7006502e23c95f5 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Sun, 10 Nov 2019 17:50:08 +0100 Subject: [PATCH 1/5] modules xcube.core.geom and xcube.core.verify no longer depend on 'lon' and 'lat' coords --- test/core/test_schema.py | 50 ++++++++++-- test/core/test_verify.py | 16 ++-- xcube/core/geom.py | 157 ++++++++++++++++++++++--------------- xcube/core/new.py | 2 + xcube/core/schema.py | 165 ++++++++++++++++++++++++++++++++++++--- xcube/core/verify.py | 82 ++++++++++--------- 6 files changed, 344 insertions(+), 128 deletions(-) diff --git a/test/core/test_schema.py b/test/core/test_schema.py index f3636385c..e6f774db5 100644 --- a/test/core/test_schema.py +++ b/test/core/test_schema.py @@ -36,7 +36,15 @@ def _assert_schema(self, schema: CubeSchema, expected_shape=None, expected_chunk self.assertIn('lon', schema.coords) self.assertEqual(('time', 'lat', 'lon'), schema.dims) self.assertEqual('time', schema.time_dim) - self.assertEqual(('lon', 'lat'), schema.spatial_dims) + self.assertEqual('lon', schema.x_name) + self.assertEqual('lat', schema.y_name) + self.assertEqual('time', schema.time_name) + self.assertEqual('lon', schema.x_dim) + self.assertEqual('lat', schema.y_dim) + self.assertEqual('time', schema.time_dim) + self.assertEqual(expected_shape[-1], schema.x_var.size) + self.assertEqual(expected_shape[-2], schema.y_var.size) + self.assertEqual(expected_shape[0], schema.time_var.size) def test_repr_html(self): cube = new_cube(variables=dict(a=2, b=3, c=4)) @@ -64,6 +72,24 @@ def test_constructor_with_invalid_args(self): self.assertEqual('coords must be a mapping from dimension names to label arrays', f'{cm.exception}') + with self.assertRaises(ValueError) as cm: + # noinspection PyTypeChecker + CubeSchema(schema.shape, cube.coords, x_name=None) + self.assertEqual('x_name must be given', + f'{cm.exception}') + + with self.assertRaises(ValueError) as cm: + # noinspection PyTypeChecker + CubeSchema(schema.shape, cube.coords, y_name=None) + self.assertEqual('y_name must be given', + f'{cm.exception}') + + with self.assertRaises(ValueError) as cm: + # noinspection PyTypeChecker + CubeSchema(schema.shape, cube.coords, time_name=None) + self.assertEqual('time_name must be given', + f'{cm.exception}') + with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape[1:], schema.coords) self.assertEqual('shape must have at least three dimensions', @@ -76,12 +102,12 @@ def test_constructor_with_invalid_args(self): with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape, schema.coords, dims=('lat', 'lon', 'time')) - self.assertEqual("the first name in dims must be 'time'", + self.assertEqual("the first dimension in dims must be 'time'", f'{cm.exception}') with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape, schema.coords, dims=('time', 'lon', 'lat')) - self.assertEqual("the last two names in dims must be either ('lat', 'lon') or ('y', 'x')", + self.assertEqual("the last two dimensions in dims must be 'lat' and 'lon'", f'{cm.exception}') with self.assertRaises(ValueError) as cm: @@ -93,7 +119,7 @@ def test_constructor_with_invalid_args(self): coords = dict(schema.coords) del coords['lat'] CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90)) - self.assertEqual("missing dimension 'lat' in coords", + self.assertEqual("missing variables 'lon', 'lat', 'time' in coords", f'{cm.exception}') with self.assertRaises(ValueError) as cm: @@ -101,7 +127,7 @@ def test_constructor_with_invalid_args(self): lat = coords['lat'] coords['lat'] = xr.DataArray(lat.values.reshape((1, len(lat))), dims=('b', lat.dims[0]), attrs=lat.attrs) CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90)) - self.assertEqual("labels of 'lat' in coords must be one-dimensional", + self.assertEqual("variables 'lon', 'lat', 'time' in coords must be 1-D", f'{cm.exception}') with self.assertRaises(ValueError) as cm: @@ -119,6 +145,20 @@ def test_new_with_cube(self): self.assertEqual("cube is empty", f'{cm.exception}') + cube = new_cube() + del cube.coords['lon'] + with self.assertRaises(ValueError) as cm: + CubeSchema.new(cube) + self.assertEqual("cube has no valid spatial coordinate variables", + f'{cm.exception}') + + cube = new_cube() + del cube.coords['time'] + with self.assertRaises(ValueError) as cm: + CubeSchema.new(cube) + self.assertEqual("cube has no valid time coordinate variable", + f'{cm.exception}') + cube = new_cube(variables=dict(a=1, b=2)) cube['c'] = xr.DataArray(np.array([1, 2, 3, 4, 5]), dims=('q',)) with self.assertRaises(ValueError) as cm: diff --git a/test/core/test_verify.py b/test/core/test_verify.py index 00b33ca8b..a8c59c9bb 100644 --- a/test/core/test_verify.py +++ b/test/core/test_verify.py @@ -26,8 +26,7 @@ def test_assert_cube_illegal_coord_var(self): with self.assertRaises(ValueError) as cm: assert_cube(cube) self.assertEqual("Dataset is not a valid xcube dataset, because:\n" - "- coordinate variable 'lat' must have a single dimension 'lat';\n" - "- coordinate variable 'lon' must have a single dimension 'lon'.", + "- missing spatial x,y coordinate variables.", f"{cm.exception}") def test_assert_cube_illegal_coord_bounds_var(self): @@ -41,12 +40,11 @@ def test_assert_cube_illegal_coord_bounds_var(self): with self.assertRaises(ValueError) as cm: assert_cube(cube) self.assertEqual("Dataset is not a valid xcube dataset, because:\n" - "- bounds coordinate variable 'lat_bnds' must have dimensions ('lat', );\n" - "- shape of bounds coordinate variable 'lat_bnds' must be (180, 2) but was (5, 180, 2);\n" "- bounds coordinate variable 'lon_bnds' must have dimensions ('lon', );\n" "- shape of bounds coordinate variable 'lon_bnds' must be (360, 2) but was (5, 360, 2);\n" - "- type of bounds coordinate variable 'lon_bnds' must be dtype('float64')" - " but was dtype('float16').", + "- type of bounds coordinate variable 'lon_bnds' must be dtype('float64') but was dtype('float16');\n" + "- bounds coordinate variable 'lat_bnds' must have dimensions ('lat', );\n" + "- shape of bounds coordinate variable 'lat_bnds' must be (180, 2) but was (5, 180, 2).", f"{cm.exception}") def test_assert_cube_illegal_data_var(self): @@ -69,7 +67,7 @@ def test_verify_cube(self): cube = new_cube() self.assertEqual([], verify_cube(cube)) ds = cube.drop("time") - self.assertEqual(["missing coordinate variable 'time'"], verify_cube(ds)) + self.assertEqual(["missing time coordinate variable"], verify_cube(ds)) ds = ds.drop("lat") - self.assertEqual(["missing coordinate variable 'time'", - "missing coordinate variable 'lat'"], verify_cube(ds)) + self.assertEqual(["missing spatial x,y coordinate variables", + "missing time coordinate variable"], verify_cube(ds)) diff --git a/xcube/core/geom.py b/xcube/core/geom.py index 221221729..9abe0c407 100644 --- a/xcube/core/geom.py +++ b/xcube/core/geom.py @@ -30,8 +30,9 @@ import shapely.wkt import xarray as xr -from xcube.util.geojson import GeoJSON +from xcube.core.schema import get_dataset_xy_var_names, get_dataset_bounds_var_name from xcube.core.update import update_dataset_spatial_attrs +from xcube.util.geojson import GeoJSON GeometryLike = Union[shapely.geometry.base.BaseGeometry, Dict[str, Any], str, Sequence[Union[float, int]]] Bounds = Tuple[float, float, float, float] @@ -72,24 +73,28 @@ def mask_dataset_by_geometry(dataset: xr.Dataset, intersection with the bounding box of the geometry. """ geometry = convert_geometry(geometry) - - intersection_geometry = intersect_geometries(get_dataset_bounds(dataset), geometry) + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) + intersection_geometry = intersect_geometries(get_dataset_bounds(dataset, xy_var_names=xy_var_names), + geometry) if intersection_geometry is None: return None if not no_clip: - dataset = _clip_dataset_by_geometry(dataset, intersection_geometry) + dataset = _clip_dataset_by_geometry(dataset, intersection_geometry, xy_var_names) + + ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds(dataset, xy_var_names=xy_var_names) - ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds(dataset) + x_var_name, y_var_name = xy_var_names + x_var, y_var = dataset[x_var_name], dataset[y_var_name] - width = dataset.dims['lon'] - height = dataset.dims['lat'] + width = x_var.size + height = y_var.size spatial_res = (ds_x_max - ds_x_min) / width mask_data = get_geometry_mask(width, height, intersection_geometry, ds_x_min, ds_y_min, spatial_res) mask = xr.DataArray(mask_data, coords=dict(lat=dataset.lat, lon=dataset.lon), - dims=('lat', 'lon')) + dims=(y_var.dims[0], x_var.dims[0])) dataset_vars = {} for var_name, var in dataset.data_vars.items(): @@ -120,34 +125,40 @@ def clip_dataset_by_geometry(dataset: xr.Dataset, :return: The dataset spatial subset, or None if the bounding box of the dataset has a no or a zero area intersection with the bounding box of the geometry. """ - intersection_geometry = intersect_geometries(get_dataset_bounds(dataset), geometry) + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) + intersection_geometry = intersect_geometries(get_dataset_bounds(dataset, xy_var_names=xy_var_names), geometry) if intersection_geometry is None: return None - return _clip_dataset_by_geometry(dataset, intersection_geometry, save_geometry_wkt=save_geometry_wkt) + return _clip_dataset_by_geometry(dataset, intersection_geometry, xy_var_names, save_geometry_wkt=save_geometry_wkt) def _clip_dataset_by_geometry(dataset: xr.Dataset, intersection_geometry: shapely.geometry.base.BaseGeometry, + xy_var_names: Tuple[str, str], save_geometry_wkt: bool = False) -> Optional[xr.Dataset]: # TODO (forman): the following code is wrong, if the dataset bounds cross the anti-meridian! - ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds(dataset) + ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds(dataset, xy_var_names=xy_var_names) - width = dataset.lon.size - height = dataset.lat.size + x_var_name, y_var_name = xy_var_names + x_var = dataset[x_var_name] + y_var = dataset[y_var_name] + + width = x_var.size + height = y_var.size res = (ds_y_max - ds_y_min) / height - g_lon_min, g_lat_min, g_lon_max, g_lat_max = intersection_geometry.bounds - x1 = _clamp(int(math.floor((g_lon_min - ds_x_min) / res)), 0, width - 1) - x2 = _clamp(int(math.ceil((g_lon_max - ds_x_min) / res)), 0, width - 1) - y1 = _clamp(int(math.floor((g_lat_min - ds_y_min) / res)), 0, height - 1) - y2 = _clamp(int(math.ceil((g_lat_max - ds_y_min) / res)), 0, height - 1) - if not is_dataset_y_axis_inverted(dataset): + g_x_min, g_y_min, g_x_max, g_y_max = intersection_geometry.bounds + x1 = _clamp(int(math.floor((g_x_min - ds_x_min) / res)), 0, width - 1) + x2 = _clamp(int(math.ceil((g_x_max - ds_x_min) / res)), 0, width - 1) + y1 = _clamp(int(math.floor((g_y_min - ds_y_min) / res)), 0, height - 1) + y2 = _clamp(int(math.ceil((g_y_max - ds_y_min) / res)), 0, height - 1) + if not is_dataset_y_axis_inverted(dataset, xy_var_names=xy_var_names): _y1, _y2 = y1, y2 y1 = height - _y2 - 1 y2 = height - _y1 - 1 - dataset_subset = dataset.isel(lon=slice(x1, x2), lat=slice(y1, y2)) + dataset_subset = dataset.isel(**{x_var_name: slice(x1, x2), y_var_name: slice(y1, y2)}) update_dataset_spatial_attrs(dataset_subset, update_existing=True, in_place=True) @@ -170,11 +181,11 @@ def _save_geometry_wkt(dataset, intersection_geometry, save_geometry): def get_geometry_mask(width: int, height: int, geometry: GeometryLike, - lon_min: float, lat_min: float, res: float) -> np.ndarray: + x_min: float, y_min: float, res: float) -> np.ndarray: geometry = convert_geometry(geometry) # noinspection PyTypeChecker - transform = affine.Affine(res, 0.0, lon_min, - 0.0, -res, lat_min + res * height) + transform = affine.Affine(res, 0.0, x_min, + 0.0, -res, y_min + res * height) return rasterio.features.geometry_mask([geometry], out_shape=(height, width), transform=transform, @@ -267,55 +278,71 @@ def convert_geometry(geometry: Optional[GeometryLike]) -> Optional[shapely.geome raise ValueError(_INVALID_GEOMETRY_MSG) -def is_dataset_y_axis_inverted(dataset: Union[xr.Dataset, xr.DataArray]) -> bool: - if 'lat' in dataset.coords: - y = dataset.lat - elif 'y' in dataset.coords: - y = dataset.y +def is_dataset_y_axis_inverted(dataset: Union[xr.Dataset, xr.DataArray], + xy_var_names: Tuple[str, str] = None) -> bool: + if xy_var_names is None: + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) + y_var = dataset[xy_var_names[1]] + return float(y_var[0]) < float(y_var[-1]) + + +def is_lon_lat_dataset(dataset: Union[xr.Dataset, xr.DataArray], + xy_var_names: Tuple[str, str] = None) -> bool: + if xy_var_names is None: + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) + x_var_name, y_var_name = xy_var_names + if x_var_name == 'lon' and y_var_name == 'lat': + return True + x_var = dataset[x_var_name] + y_var = dataset[y_var_name] + return x_var.attrs.get('long_name') == 'longitude' and y_var.attrs.get('long_name') == 'latitude' + + +def get_dataset_geometry(dataset: Union[xr.Dataset, xr.DataArray], + xy_var_names: Tuple[str, str] = None) -> shapely.geometry.base.BaseGeometry: + if xy_var_names is None: + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) + geo_bounds = get_dataset_bounds(dataset, xy_var_names=xy_var_names) + if is_lon_lat_dataset(dataset, xy_var_names=xy_var_names): + return get_box_split_bounds_geometry(*geo_bounds) else: - raise ValueError("Neither 'lat' nor 'y' coordinate variable found.") - return float(y[0]) < float(y[-1]) - - -def get_dataset_geometry(dataset: Union[xr.Dataset, xr.DataArray]) -> shapely.geometry.base.BaseGeometry: - return get_box_split_bounds_geometry(*get_dataset_bounds(dataset)) + return shapely.geometry.box(*geo_bounds) -def get_dataset_bounds(dataset: Union[xr.Dataset, xr.DataArray]) -> Bounds: - lon_var = dataset.coords.get("lon") - lat_var = dataset.coords.get("lat") - if lon_var is None: - raise ValueError('Missing coordinate variable "lon"') - if lat_var is None: - raise ValueError('Missing coordinate variable "lat"') +def get_dataset_bounds(dataset: Union[xr.Dataset, xr.DataArray], + xy_var_names: Tuple[str, str] = None) -> Bounds: + if xy_var_names is None: + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) + x_name, y_name = xy_var_names + x_var, y_var = dataset.coords[x_name], dataset.coords[y_name] - lon_bnds_name = lon_var.attrs["bounds"] if "bounds" in lon_var.attrs else "lon_bnds" - if lon_bnds_name in dataset.coords: - lon_bnds_var = dataset.coords[lon_bnds_name] - lon_min = lon_bnds_var[0][0] - lon_max = lon_bnds_var[-1][1] + x_bnds_name = get_dataset_bounds_var_name(dataset, x_name) + if x_bnds_name: + x_bnds_var = dataset.coords[x_bnds_name] + x_min = x_bnds_var[0][0] + x_max = x_bnds_var[-1][1] else: - lon_min = lon_var[0] - lon_max = lon_var[-1] - delta = min(abs(np.diff(lon_var))) - lon_min -= 0.5 * delta - lon_max += 0.5 * delta - - lat_bnds_name = lat_var.attrs["bounds"] if "bounds" in lat_var.attrs else "lat_bnds" - if lat_bnds_name in dataset.coords: - lat_bnds_var = dataset.coords[lat_bnds_name] - lat1 = lat_bnds_var[0][0] - lat2 = lat_bnds_var[-1][1] - lat_min = min(lat1, lat2) - lat_max = max(lat1, lat2) + x_min = x_var[0] + x_max = x_var[-1] + delta = min(abs(np.diff(x_var))) + x_min -= 0.5 * delta + x_max += 0.5 * delta + + y_bnds_name = get_dataset_bounds_var_name(dataset, y_name) + if y_bnds_name: + y_bnds_var = dataset.coords[y_bnds_name] + y1 = y_bnds_var[0][0] + y2 = y_bnds_var[-1][1] + y_min = min(y1, y2) + y_max = max(y1, y2) else: - lat1 = lat_var[0] - lat2 = lat_var[-1] - delta = min(abs(np.diff(lat_var))) - lat_min = min(lat1, lat2) - 0.5 * delta - lat_max = max(lat1, lat2) + 0.5 * delta + y1 = y_var[0] + y2 = y_var[-1] + delta = min(abs(np.diff(y_var))) + y_min = min(y1, y2) - 0.5 * delta + y_max = max(y1, y2) + 0.5 * delta - return float(lon_min), float(lat_min), float(lon_max), float(lat_max) + return float(x_min), float(y_min), float(x_max), float(y_max) def get_box_split_bounds(lon_min: float, lat_min: float, diff --git a/xcube/core/new.py b/xcube/core/new.py index 8e58db82f..702e3f698 100644 --- a/xcube/core/new.py +++ b/xcube/core/new.py @@ -59,12 +59,14 @@ def my_func(time: int, lat: int, lon: int) -> Union[bool, int, float] lon_data = np.linspace(lon_start + spatial_res_05, lon_end - spatial_res_05, width) lon = xr.DataArray(lon_data, dims="lon") + lon.attrs["long_name"] = "longitude" lon.attrs["units"] = "degrees_east" lat_data = np.linspace(lat_start + 0.5 * spatial_res, lat_end - 0.5 * spatial_res, height) lat = xr.DataArray(lat_data, dims="lat") if inverse_lat: lat = lat[::-1] + lat.attrs["long_name"] = "latitude" lat.attrs["units"] = "degrees_north" time_data_2 = pd.date_range(start=time_start, periods=time_periods + 1, freq=time_freq).values diff --git a/xcube/core/schema.py b/xcube/core/schema.py index d95a2c071..9598d3fc7 100644 --- a/xcube/core/schema.py +++ b/xcube/core/schema.py @@ -19,7 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from typing import Tuple, Sequence, Dict, Optional, Mapping +from typing import Tuple, Sequence, Dict, Optional, Mapping, Union import numpy as np import xarray as xr @@ -39,6 +39,9 @@ class CubeSchema: def __init__(self, shape: Sequence[int], coords: Mapping[str, np.array], + x_name: str = 'lon', + y_name: str = 'lat', + time_name: str = 'time', dims: Sequence[str] = None, chunks: Sequence[int] = None): @@ -46,17 +49,29 @@ def __init__(self, raise ValueError('shape must be a sequence of integer sizes') if not coords: raise ValueError('coords must be a mapping from dimension names to label arrays') + if not x_name: + raise ValueError('x_name must be given') + if not y_name: + raise ValueError('y_name must be given') + if not time_name: + raise ValueError('time_name must be given') ndim = len(shape) if ndim < 3: raise ValueError('shape must have at least three dimensions') - dims = tuple(dims) or ('time', 'lat', 'lon') + dims = tuple(dims) or (time_name, y_name, x_name) if dims and len(dims) != ndim: raise ValueError('dims must have same length as shape') - if dims[0] != 'time': - raise ValueError("the first name in dims must be 'time'") - if dims[-2:] not in (('lat', 'lon'), ('y', 'x')): - raise ValueError("the last two names in dims must be either ('lat', 'lon') or ('y', 'x')") + if x_name not in coords or y_name not in coords or time_name not in coords: + raise ValueError(f'missing variables {x_name!r}, {y_name!r}, {time_name!r} in coords') + x_var, y_var, time_var = coords.get(x_name), coords.get(y_name), coords.get(time_name) + if x_var.ndim != 1 or y_var.ndim != 1 or time_var.ndim != 1: + raise ValueError(f'variables {x_name!r}, {y_name!r}, {time_name!r} in coords must be 1-D') + x_dim, y_dim, time_dim = x_var.dims[0], y_var.dims[0], time_var.dims[0] + if dims[0] != time_dim: + raise ValueError(f"the first dimension in dims must be {time_dim!r}") + if dims[-2:] != (y_dim, x_dim): + raise ValueError(f"the last two dimensions in dims must be {y_dim!r} and {x_dim!r}") if chunks and len(chunks) != ndim: raise ValueError('chunks must have same length as shape') for i in range(ndim): @@ -71,6 +86,9 @@ def __init__(self, raise ValueError(f'number of labels of {dim_name!r} in coords does not match shape') self._shape = tuple(shape) + self._x_name = x_name + self._y_name = y_name + self._time_name = time_name self._dims = dims self._chunks = tuple(chunks) if chunks else None self._coords = dict(coords) @@ -86,14 +104,49 @@ def dims(self) -> Tuple[str, ...]: return self._dims @property - def time_dim(self) -> str: - """Name of time dimension.""" - return self._dims[0] + def x_name(self) -> str: + """Name of the spatial x coordinate variable.""" + return self._x_name @property - def spatial_dims(self) -> Tuple[str, str]: - """Tuple (pair) of spatial dimension names, will be either ('lon', 'lat') or ('x', 'y').""" - return self._dims[-1], self._dims[-2] + def y_name(self) -> str: + """Name of the spatial y coordinate variable.""" + return self._y_name + + @property + def time_name(self) -> str: + """Name of the time coordinate variable.""" + return self._time_name + + @property + def x_var(self) -> xr.DataArray: + """Spatial x coordinate variable.""" + return self._coords[self._x_name] + + @property + def y_var(self) -> xr.DataArray: + """Spatial y coordinate variable.""" + return self._coords[self._y_name] + + @property + def time_var(self) -> xr.DataArray: + """Time coordinate variable.""" + return self._coords[self._time_name] + + @property + def x_dim(self) -> str: + """Name of the spatial x dimension.""" + return self._dims[-1] + + @property + def y_dim(self) -> str: + """Name of the spatial y dimension.""" + return self._dims[-2] + + @property + def time_dim(self) -> str: + """Name of the time dimension.""" + return self._dims[0] @property def shape(self) -> Tuple[int, ...]: @@ -134,6 +187,10 @@ def get_cube_schema(cube: xr.Dataset) -> CubeSchema: :param cube: The data cube. :return: The cube schema. """ + + xy_var_names = get_dataset_xy_var_names(cube, must_exist=True, dataset_arg_name='cube') + time_var_name = get_dataset_time_var_name(cube, must_exist=True, dataset_arg_name='cube') + first_dims = None first_shape = None first_chunks = None @@ -180,4 +237,86 @@ def get_cube_schema(cube: xr.Dataset) -> CubeSchema: if first_dims is None: raise ValueError('cube is empty') - return CubeSchema(first_shape, first_coords, dims=tuple(str(d) for d in first_dims), chunks=first_chunks) + return CubeSchema(first_shape, + first_coords, + x_name=xy_var_names[0], + y_name=xy_var_names[1], + time_name=time_var_name, + dims=tuple(str(d) for d in first_dims), + chunks=first_chunks) + + +def get_dataset_xy_var_names(dataset: Union[xr.Dataset, xr.DataArray], + must_exist: bool = False, + dataset_arg_name: str = 'dataset') -> Optional[Tuple[str, str]]: + x_var_name = None + y_var_name = None + for var_name, var in dataset.coords.items(): + if var.attrs.get('standard_name') == 'projection_x_coordinate' \ + or var.attrs.get('long_name') == 'x coordinate of projection': + if var.ndim == 1: + x_var_name = var_name + if var.attrs.get('standard_name') == 'projection_y_coordinate' \ + or var.attrs.get('long_name') == 'y coordinate of projection': + if var.ndim == 1: + y_var_name = var_name + if x_var_name and y_var_name: + return str(x_var_name), str(y_var_name) + + x_var_name = None + y_var_name = None + for var_name, var in dataset.coords.items(): + if var.attrs.get('long_name') == 'longitude': + if var.ndim == 1: + x_var_name = var_name + if var.attrs.get('long_name') == 'latitude': + if var.ndim == 1: + y_var_name = var_name + if x_var_name and y_var_name: + return str(x_var_name), str(y_var_name) + + for x_var_name, y_var_name in (('lon', 'lat'), ('x', 'y')): + if x_var_name in dataset.coords and y_var_name in dataset.coords: + x_var = dataset.coords[x_var_name] + y_var = dataset.coords[y_var_name] + if x_var.ndim == 1 and y_var.ndim == 1: + return x_var_name, y_var_name + + if must_exist: + raise ValueError(f'{dataset_arg_name} has no valid spatial coordinate variables') + + return None + + +def get_dataset_time_var_name(dataset: Union[xr.Dataset, xr.DataArray], + must_exist: bool = False, + dataset_arg_name: str = 'dataset') -> Optional[str]: + time_var_name = 'time' + if time_var_name in dataset.coords: + time_var = dataset.coords[time_var_name] + if time_var.ndim == 1 and np.issubdtype(time_var.dtype, np.datetime64): + return time_var_name + + if must_exist: + raise ValueError(f'{dataset_arg_name} has no valid time coordinate variable') + + return None + + +def get_dataset_bounds_var_name(dataset: Union[xr.Dataset, xr.DataArray], + var_name: str, + must_exist: bool = False, + dataset_arg_name: str = 'dataset') -> Optional[str]: + if var_name in dataset.coords: + var = dataset[var_name] + bounds_var_name = var.attrs.get('bounds', f'{var_name}_bnds') + if bounds_var_name in dataset: + bounds_var = dataset[bounds_var_name] + if bounds_var.ndim == 2 \ + and bounds_var.shape[0] == var.shape[0] and bounds_var.shape[1] == 2: + return bounds_var_name + + if must_exist: + raise ValueError(f'{dataset_arg_name} has no valid bounds variable for variable {var_name!r}') + + return None diff --git a/xcube/core/verify.py b/xcube/core/verify.py index 0a6e6312a..a1a943f21 100644 --- a/xcube/core/verify.py +++ b/xcube/core/verify.py @@ -24,6 +24,9 @@ import numpy as np import xarray as xr +from xcube.core.geom import is_lon_lat_dataset +from xcube.core.schema import get_dataset_xy_var_names, get_dataset_time_var_name + def assert_cube(dataset: xr.Dataset, name=None) -> xr.Dataset: """ @@ -48,13 +51,11 @@ def verify_cube(dataset: xr.Dataset) -> List[str]: Verify the given *dataset* for being a valid xcube dataset. The tool verifies that *dataset* - * defines the dimensions "time", "lat", "lon"; - * has corresponding "time", "lat", "lon" coordinate variables and that they - are valid, e.g. 1-D, non-empty, using correct units; - * has valid bounds variables for "time", "lat", "lon" coordinate - variables, if any; + * defines two spatial x,y coordinate variables, that are 1D, non-empty, using correct units; + * defines a time coordinate variables, that are 1D, non-empty, using correct units; + * has valid bounds variables for spatial x,y and time coordinate variables, if any; * has any data variables and that they are valid, e.g. min. 3-D, all have - same dimensions, have at least dimensions "time", "lat", "lon". + same dimensions, have at least the dimensions dim(time), dim(y), dim(x) in that order. Returns a list of issues, which is empty if *dataset* is a valid xcube dataset. @@ -62,18 +63,32 @@ def verify_cube(dataset: xr.Dataset) -> List[str]: :return: List of issues or empty list. """ report = [] - _check_dim(dataset, "time", report) - _check_dim(dataset, "lat", report) - _check_dim(dataset, "lon", report) - _check_time(dataset, "time", report) - _check_lon_or_lat(dataset, "lat", -90, 90, report) - _check_lon_or_lat(dataset, "lon", -180, 180, report) - # TODO (forman): verify bounds coordinate variables - _check_data_variables(dataset, report) + + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=False) + if xy_var_names is None: + report.append(f"missing spatial x,y coordinate variables") + + time_var_name = get_dataset_time_var_name(dataset, must_exist=False) + if time_var_name is None: + report.append(f"missing time coordinate variable") + + if time_var_name: + _check_time(dataset, time_var_name, report) + if xy_var_names and is_lon_lat_dataset(dataset, xy_var_names=xy_var_names): + _check_lon_or_lat(dataset, xy_var_names[0], -180., 180., report) + _check_lon_or_lat(dataset, xy_var_names[1], -90., 90., report) + + if xy_var_names and time_var_name: + _check_data_variables(dataset, xy_var_names, time_var_name, report) + return report -def _check_data_variables(dataset, report): +def _check_data_variables(dataset, xy_var_names, time_var_name, report): + x_var_name, y_var_name = xy_var_names + x_var, y_var, time_var = dataset[x_var_name], dataset[y_var_name], dataset[time_var_name] + x_dim, y_dim, time_dim = x_var.dims[0], y_var.dims[0], time_var.dims[0] + first_var = None first_dims = None first_chunks = None @@ -81,9 +96,9 @@ def _check_data_variables(dataset, report): dims = var.dims chunks = var.data.chunks if hasattr(var.data, "chunks") else None - if len(dims) < 3 or dims[0] != "time" or dims[-2] != "lat" or dims[-1] != "lon": + if len(dims) < 3 or dims[0] != time_dim or dims[-2] != y_dim or dims[-1] != x_dim: report.append(f"dimensions of data variable {var_name!r}" - f" must be ('time', ..., 'lat', 'lon')," + f" must be ({time_dim!r}, ..., {y_dim!r}, {x_dim!r})," f" but were {dims!r} for {var_name!r}") if first_var is None: @@ -110,28 +125,28 @@ def _check_dim(dataset, name, report): report.append(f"size of dimension {name!r} must be a positive integer") -def _check_coord_var(dataset, name, report): - if name not in dataset.coords: - report.append(f"missing coordinate variable {name!r}") +def _check_coord_var(dataset, var_name, report): + if var_name not in dataset.coords: + report.append(f"missing coordinate variable {var_name!r}") return None - var = dataset.coords[name] - if var.dims != (name,): - report.append(f"coordinate variable {name!r} must have a single dimension {name!r}") + var = dataset.coords[var_name] + if var.dims != (var_name,): + report.append(f"coordinate variable {var_name!r} must have a single dimension {var_name!r}") return None if var.size == 0: - report.append(f"coordinate variable {name!r} must not be empty") + report.append(f"coordinate variable {var_name!r} must not be empty") return None - bnds_name = var.attrs.get('bounds', f'{name}_bnds') + bnds_name = var.attrs.get('bounds', f'{var_name}_bnds') if bnds_name in dataset.coords: bnds_var = dataset.coords[bnds_name] expected_shape = var.size, 2 expected_dtype = var.dtype - if len(bnds_var.dims) != 2 or bnds_var.dims[0] != name: + if len(bnds_var.dims) != 2 or bnds_var.dims[0] != var_name: report.append(f"bounds coordinate variable {bnds_name!r}" - f" must have dimensions ({name!r}, )") + f" must have dimensions ({var_name!r}, )") if bnds_var.shape != expected_shape: report.append( f"shape of bounds coordinate variable {bnds_name!r}" @@ -145,23 +160,18 @@ def _check_coord_var(dataset, name, report): return var -def _check_lon_or_lat(dataset, name, min_value, max_value, report): - var = _check_coord_var(dataset, name, report) +def _check_lon_or_lat(dataset, var_name, min_value, max_value, report): + var = _check_coord_var(dataset, var_name, report) if var is None: return if not np.all(np.isfinite(var)): - report.append(f"values of coordinate variable {name!r} must be finite") + report.append(f"values of coordinate variable {var_name!r} must be finite") if np.min(var) < min_value or np.max(var) > max_value: - report.append(f"values of coordinate variable {name!r}" + report.append(f"values of coordinate variable {var_name!r}" f" must be in the range {min_value} to {max_value}") - # TODO (forman): the following check is not valid for "lat" because we currently use 'wrong' lat-order - # TODO (forman): the following check is not valid for "lon" if a cube covers the antimeridian - # if not np.all(np.diff(var.astype(np.float64)) > 0): - # report.append(f"values of coordinate variable {name!r} must be monotonic increasing") - def _check_time(dataset, name, report): var = _check_coord_var(dataset, name, report) From 4311ef410b309b5cc3167f902f05cf4e832613fd Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Sun, 10 Nov 2019 20:59:21 +0100 Subject: [PATCH 2/5] new API function rasterize_features_into_dataset() --- test/core/test_geom.py | 61 ++++++++++++++++++++++++++++++++++++++++-- xcube/core/geom.py | 56 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 113 insertions(+), 4 deletions(-) diff --git a/test/core/test_geom.py b/test/core/test_geom.py index cfec8de2d..341b38918 100644 --- a/test/core/test_geom.py +++ b/test/core/test_geom.py @@ -4,10 +4,67 @@ import shapely.geometry import xarray as xr -from xcube.core.new import new_cube from xcube.core.chunk import chunk_dataset from xcube.core.geom import get_dataset_geometry, get_dataset_bounds, get_geometry_mask, convert_geometry, \ - mask_dataset_by_geometry, clip_dataset_by_geometry + mask_dataset_by_geometry, clip_dataset_by_geometry, rasterize_features_into_dataset +from xcube.core.new import new_cube + + +class RasterizeFeaturesIntoDataset(unittest.TestCase): + def test_rasterize_features_into_dataset(self): + dataset = new_cube(width=10, height=10, spatial_res=10, lon_start=-50, lat_start=-50) + feature1 = dict(type='Feature', + geometry=dict(type='Polygon', + coordinates=[[(-180, 0), (-1, 0), (-1, 90), (-180, 90), (-180, 0)]]), + properties=dict(a=0.5, b=2.1, c=4.9)) + feature2 = dict(type='Feature', + geometry=dict(type='Polygon', + coordinates=[[(-180, -90), (-1, -90), (-1, 0), (-180, 0), (-180, -90)]]), + properties=dict(a=0.6, b=2.2, c=4.8)) + feature3 = dict(type='Feature', + geometry=dict(type='Polygon', + coordinates=[[(20, 0), (180, 0), (180, 90), (20, 90), (20, 0)]]), + properties=dict(a=0.7, b=2.3, c=4.7)) + feature4 = dict(type='Feature', + geometry=dict(type='Polygon', + coordinates=[[(20, -90), (180, -90), (180, 0), (20, 0), (20, -90)]]), + properties=dict(a=0.8, b=2.4, c=4.6)) + dataset = rasterize_features_into_dataset(dataset, + [feature1, feature2, feature3, feature4], + ['a', 'b', 'c'], + in_place=False) + self.assertIsNotNone(dataset) + self.assertIn('lon', dataset.coords) + self.assertIn('lat', dataset.coords) + self.assertIn('time', dataset.coords) + self.assertIn('a', dataset) + self.assertIn('b', dataset) + self.assertIn('c', dataset) + self.assertEquals((10, 10), dataset.a.shape) + self.assertEquals((10, 10), dataset.b.shape) + self.assertEquals((10, 10), dataset.c.shape) + self.assertEquals(('lat', 'lon'), dataset.a.dims) + self.assertEquals(('lat', 'lon'), dataset.b.dims) + self.assertEquals(('lat', 'lon'), dataset.c.dims) + self.assertIn(0.5, dataset.a.min()) + self.assertIn(0.8, dataset.a.max()) + self.assertIn(2.1, dataset.b.min()) + self.assertIn(2.4, dataset.b.max()) + self.assertIn(4.6, dataset.c.min()) + self.assertIn(4.9, dataset.c.max()) + nan = np.nan + np.testing.assert_almost_equal( + np.array([[0.5, 0.5, 0.5, 0.5, 0.5, nan, nan, 0.7, 0.7, 0.7], + [0.5, 0.5, 0.5, 0.5, 0.5, nan, nan, 0.7, 0.7, 0.7], + [0.5, 0.5, 0.5, 0.5, 0.5, nan, nan, 0.7, 0.7, 0.7], + [0.5, 0.5, 0.5, 0.5, 0.5, nan, nan, 0.7, 0.7, 0.7], + [0.5, 0.5, 0.5, 0.5, 0.5, nan, nan, 0.7, 0.7, 0.7], + [0.6, 0.6, 0.6, 0.6, 0.6, nan, nan, 0.8, 0.8, 0.8], + [0.6, 0.6, 0.6, 0.6, 0.6, nan, nan, 0.8, 0.8, 0.8], + [0.6, 0.6, 0.6, 0.6, 0.6, nan, nan, 0.8, 0.8, 0.8], + [0.6, 0.6, 0.6, 0.6, 0.6, nan, nan, 0.8, 0.8, 0.8], + [0.6, 0.6, 0.6, 0.6, 0.6, nan, nan, 0.8, 0.8, 0.8]]), + dataset.a.values) class DatasetGeometryTest(unittest.TestCase): diff --git a/xcube/core/geom.py b/xcube/core/geom.py index 9abe0c407..6bf43f4c9 100644 --- a/xcube/core/geom.py +++ b/xcube/core/geom.py @@ -20,7 +20,7 @@ # SOFTWARE. import math -from typing import Optional, Union, Dict, Tuple, Sequence, Any +from typing import Optional, Union, Dict, Tuple, Sequence, Any, Mapping import affine import numpy as np @@ -46,6 +46,58 @@ _INVALID_BOX_COORDS_MSG = 'Invalid box coordinates' +def rasterize_features_into_dataset(dataset: xr.Dataset, + features: Sequence[Mapping[str, Any]], + feature_property_names: Sequence[str], + feature_property_name_mapping: Dict[str, str] = None, + in_place: bool = False) -> Optional[xr.Dataset]: + feature_property_name_mapping = feature_property_name_mapping or {} + xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) + dataset_bounds = get_dataset_bounds(dataset, xy_var_names=xy_var_names) + + ds_x_min, ds_y_min, ds_x_max, ds_y_max = dataset_bounds + + x_var_name, y_var_name = xy_var_names + x_var, y_var = dataset[x_var_name], dataset[y_var_name] + x_dim, y_dim = x_var.dims[0], y_var.dims[0] + + width = x_var.size + height = y_var.size + spatial_res = (ds_x_max - ds_x_min) / width + + if not in_place: + dataset = xr.Dataset(coords=dataset.coords, attrs=dataset.attrs) + + for feature in features: + geometry = convert_geometry(feature['geometry']) + intersection_geometry = intersect_geometries(dataset_bounds, geometry) + if intersection_geometry is None: + continue + + mask_data = get_geometry_mask(width, height, intersection_geometry, ds_x_min, ds_y_min, spatial_res) + mask = xr.DataArray(mask_data, + coords={y_var_name: y_var, x_var_name: x_var}, + dims=(y_dim, x_dim)) + + for feature_property_name in feature_property_names: + var_name = feature_property_name_mapping.get(feature_property_name, feature_property_name) + feature_property_value = float(feature['properties'].get(feature_property_name, float('nan'))) + feature_property_var = xr.DataArray(np.full((height, width), feature_property_value, dtype=np.float64), + coords={y_var_name: y_var, x_var_name: x_var}, + dims=(y_dim, x_dim)) + if feature_property_name not in dataset: + feature_property_var_old = xr.DataArray(np.full((height, width), np.nan, dtype=np.float64), + coords={y_var_name: y_var, x_var_name: x_var}, + dims=(y_dim, x_dim)) + dataset[var_name] = feature_property_var_old + else: + feature_property_var_old = dataset[var_name] + + dataset[var_name] = feature_property_var.where(mask, feature_property_var_old) + + return dataset + + def mask_dataset_by_geometry(dataset: xr.Dataset, geometry: GeometryLike, excluded_vars: Sequence[str] = None, @@ -93,7 +145,7 @@ def mask_dataset_by_geometry(dataset: xr.Dataset, mask_data = get_geometry_mask(width, height, intersection_geometry, ds_x_min, ds_y_min, spatial_res) mask = xr.DataArray(mask_data, - coords=dict(lat=dataset.lat, lon=dataset.lon), + coords={y_var_name: y_var, x_var_name: x_var}, dims=(y_var.dims[0], x_var.dims[0])) dataset_vars = {} From c4ae482d93cb89ed4ea2f7e62e6ba27980855a98 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Mon, 11 Nov 2019 11:14:02 +0100 Subject: [PATCH 3/5] module xcube.core.new no longer depend on 'lon' and 'lat' coords addressing #222 with new function rasterize_features_into_dataset() --- CHANGES.md | 2 + test/cli/test_cli.py | 4 +- test/core/test_extract.py | 10 +- test/core/test_geom.py | 65 +++++++---- xcube/core/geom.py | 80 ++++++++++---- xcube/core/new.py | 222 +++++++++++++++++++++++--------------- xcube/core/xarray.py | 41 +------ 7 files changed, 248 insertions(+), 176 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 801aa48ca..123b004a2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,8 @@ function that can be used to generate an output cube computed from a Python function that is applied to one or more input cubes. Replaces the formerly hidden `xcube apply` command. (#167) + +* Added new function `xcube.core.geom.` to rasterize features (#222) ### Enhancements diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 8ac4d11ef..5f38263a5 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -27,8 +27,8 @@ def test_dump_ds(self): "Attributes:\n" " Conventions: CF-1.7\n" " title: Test Cube\n" - " time_coverage_start: 2010-01-01 00:00:00\n" - " time_coverage_end: 2010-01-06 00:00:00\n" + " time_coverage_start: 2010-01-01T00:00:00\n" + " time_coverage_end: 2010-01-06T00:00:00\n" " geospatial_lon_min: -180.0\n" " geospatial_lon_max: 180.0\n" " geospatial_lon_units: degrees_east\n" diff --git a/test/core/test_extract.py b/test/core/test_extract.py index 0b49ce6f6..8a8b6d22b 100644 --- a/test/core/test_extract.py +++ b/test/core/test_extract.py @@ -13,9 +13,9 @@ class GetCubeValuesForPointsTest(unittest.TestCase): def _new_test_cube(self): return new_cube(width=2000, height=1000, - lon_start=0.0, - lat_start=50.0, - spatial_res=4.0 / 2000, + x_start=0.0, + y_start=50.0, + x_res=4.0 / 2000, time_start="2010-01-01", time_periods=20, variables=dict(precipitation=0.6, temperature=276.2)) @@ -107,11 +107,11 @@ def test_get_dataset_indexes_without_bounds(self): self._assert_get_dataset_indexes_works(dataset) def test_get_dataset_indexes_with_bounds_inverse_lat(self): - dataset = new_cube(width=360, height=180, inverse_lat=True, drop_bounds=False) + dataset = new_cube(width=360, height=180, inverse_y=True, drop_bounds=False) self._assert_get_dataset_indexes_works(dataset, inverse_lat=True) def test_get_dataset_indexes_without_bounds_inverse_lat(self): - dataset = new_cube(width=360, height=180, inverse_lat=True, drop_bounds=True) + dataset = new_cube(width=360, height=180, inverse_y=True, drop_bounds=True) self._assert_get_dataset_indexes_works(dataset, inverse_lat=True) def _assert_get_dataset_indexes_works(self, dataset, inverse_lat=False): diff --git a/test/core/test_geom.py b/test/core/test_geom.py index 341b38918..0a8113a7b 100644 --- a/test/core/test_geom.py +++ b/test/core/test_geom.py @@ -5,53 +5,74 @@ import xarray as xr from xcube.core.chunk import chunk_dataset -from xcube.core.geom import get_dataset_geometry, get_dataset_bounds, get_geometry_mask, convert_geometry, \ - mask_dataset_by_geometry, clip_dataset_by_geometry, rasterize_features_into_dataset +from xcube.core.geom import clip_dataset_by_geometry +from xcube.core.geom import convert_geometry +from xcube.core.geom import get_dataset_bounds +from xcube.core.geom import get_dataset_geometry +from xcube.core.geom import get_geometry_mask +from xcube.core.geom import mask_dataset_by_geometry +from xcube.core.geom import rasterize_features_into_dataset from xcube.core.new import new_cube class RasterizeFeaturesIntoDataset(unittest.TestCase): - def test_rasterize_features_into_dataset(self): - dataset = new_cube(width=10, height=10, spatial_res=10, lon_start=-50, lat_start=-50) + def test_rasterize_features_into_dataset_lonlat(self): + self._test_rasterize_features_into_dataset('lon', 'lat') + + def test_rasterize_features_into_dataset_xy(self): + self._test_rasterize_features_into_dataset('x', 'y') + + def _test_rasterize_features_into_dataset(self, x_name, y_name): + dataset = new_cube(width=10, height=10, x_name=x_name, y_name=y_name, x_res=10, x_start=-50, y_start=-50) feature1 = dict(type='Feature', geometry=dict(type='Polygon', coordinates=[[(-180, 0), (-1, 0), (-1, 90), (-180, 90), (-180, 0)]]), - properties=dict(a=0.5, b=2.1, c=4.9)) + properties=dict(a=0.5, b=2.1, c=9)) feature2 = dict(type='Feature', geometry=dict(type='Polygon', coordinates=[[(-180, -90), (-1, -90), (-1, 0), (-180, 0), (-180, -90)]]), - properties=dict(a=0.6, b=2.2, c=4.8)) + properties=dict(a=0.6, b=2.2, c=8)) feature3 = dict(type='Feature', geometry=dict(type='Polygon', coordinates=[[(20, 0), (180, 0), (180, 90), (20, 90), (20, 0)]]), - properties=dict(a=0.7, b=2.3, c=4.7)) + properties=dict(a=0.7, b=2.3, c=7)) feature4 = dict(type='Feature', geometry=dict(type='Polygon', coordinates=[[(20, -90), (180, -90), (180, 0), (20, 0), (20, -90)]]), - properties=dict(a=0.8, b=2.4, c=4.6)) + properties=dict(a=0.8, b=2.4, c=6)) dataset = rasterize_features_into_dataset(dataset, [feature1, feature2, feature3, feature4], ['a', 'b', 'c'], + var_properties=dict( + b=('b', np.float32, np.nan, dict(units='meters')), + c=('c2', np.uint8, 0, None), + ), in_place=False) self.assertIsNotNone(dataset) - self.assertIn('lon', dataset.coords) - self.assertIn('lat', dataset.coords) + self.assertIn(x_name, dataset.coords) + self.assertIn(y_name, dataset.coords) self.assertIn('time', dataset.coords) self.assertIn('a', dataset) self.assertIn('b', dataset) - self.assertIn('c', dataset) + self.assertIn('c2', dataset) self.assertEquals((10, 10), dataset.a.shape) self.assertEquals((10, 10), dataset.b.shape) - self.assertEquals((10, 10), dataset.c.shape) - self.assertEquals(('lat', 'lon'), dataset.a.dims) - self.assertEquals(('lat', 'lon'), dataset.b.dims) - self.assertEquals(('lat', 'lon'), dataset.c.dims) - self.assertIn(0.5, dataset.a.min()) - self.assertIn(0.8, dataset.a.max()) - self.assertIn(2.1, dataset.b.min()) - self.assertIn(2.4, dataset.b.max()) - self.assertIn(4.6, dataset.c.min()) - self.assertIn(4.9, dataset.c.max()) + self.assertEquals((10, 10), dataset.c2.shape) + self.assertEquals(np.float64, dataset.a.dtype) + self.assertEquals(np.float32, dataset.b.dtype) + self.assertEquals(np.uint8, dataset.c2.dtype) + self.assertEquals({}, dataset.a.attrs) + self.assertEquals({'units': 'meters'}, dataset.b.attrs) + self.assertEquals({}, dataset.c2.attrs) + self.assertEquals((y_name, x_name), dataset.a.dims) + self.assertEquals((y_name, x_name), dataset.b.dims) + self.assertEquals((y_name, x_name), dataset.c2.dims) + self.assertEquals(np.array(0.5, dtype=np.float64), dataset.a.min()) + self.assertEquals(np.array(0.8, dtype=np.float64), dataset.a.max()) + self.assertEquals(np.array(2.1, dtype=np.float32), dataset.b.min()) + self.assertEquals(np.array(2.4, dtype=np.float32), dataset.b.max()) + self.assertEquals(np.array(0, dtype=np.uint8), dataset.c2.min()) + self.assertEquals(np.array(9, dtype=np.uint8), dataset.c2.max()) nan = np.nan np.testing.assert_almost_equal( np.array([[0.5, 0.5, 0.5, 0.5, 0.5, nan, nan, 0.7, 0.7, 0.7], @@ -85,7 +106,7 @@ def setUp(self) -> None: self.cube = new_cube(width=width, height=height, - spatial_res=spatial_res, + x_res=spatial_res, drop_bounds=True, variables=dict(temp=273.9, precip=0.9)) diff --git a/xcube/core/geom.py b/xcube/core/geom.py index 6bf43f4c9..a65e0da5a 100644 --- a/xcube/core/geom.py +++ b/xcube/core/geom.py @@ -45,13 +45,35 @@ _INVALID_BOX_COORDS_MSG = 'Invalid box coordinates' +Name = str +Attrs = Dict[Name, Any] +VarPropsTuple = Tuple[Name, Any, Any, Optional[Attrs]] + def rasterize_features_into_dataset(dataset: xr.Dataset, - features: Sequence[Mapping[str, Any]], - feature_property_names: Sequence[str], - feature_property_name_mapping: Dict[str, str] = None, + features: Sequence[Mapping[Name, Any]], + feature_property_names: Sequence[Name], + var_properties: Dict[Name, Union[Name, VarPropsTuple]] = None, in_place: bool = False) -> Optional[xr.Dataset]: - feature_property_name_mapping = feature_property_name_mapping or {} + """ + Rasterize numeric feature properties of GeoJSON *features* as new variables into *dataset*. + + Using *var_properties* the properties of newly created variables can be determined. + If provided, it must be a mapping from feature property name to a variable name + or a 4-tuple ``(name, dtype, fill_value, attributes)`` to be used for a new variable. + If only variable name is given, the defaults are + ``(name, dtype='float64', fill_value=float('nan'), attributes=None)``. + + :param dataset: The xarray dataset. + :param features: Sequence of GeoJSON features. + :param feature_property_names: Sequence of names of numeric feature properties to be rasterized. + :param var_properties: Optional mapping of feature property name + to a name or a 4-tuple (name, dtype, fill_value, attributes) for the new variable. + :param in_place: Whether to add new variables to *dataset*. + If False, a copy will be created and returned. + :return: dataset with rasterized feature_property + """ + var_properties = var_properties or {} xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True) dataset_bounds = get_dataset_bounds(dataset, xy_var_names=xy_var_names) @@ -60,6 +82,8 @@ def rasterize_features_into_dataset(dataset: xr.Dataset, x_var_name, y_var_name = xy_var_names x_var, y_var = dataset[x_var_name], dataset[y_var_name] x_dim, y_dim = x_var.dims[0], y_var.dims[0] + coords = {y_var_name: y_var, x_var_name: x_var} + dims = (y_dim, x_dim) width = x_var.size height = y_var.size @@ -69,31 +93,47 @@ def rasterize_features_into_dataset(dataset: xr.Dataset, dataset = xr.Dataset(coords=dataset.coords, attrs=dataset.attrs) for feature in features: - geometry = convert_geometry(feature['geometry']) + geometry = feature.get('geometry') + if not geometry: + continue + + geometry = convert_geometry(geometry) + if geometry.is_empty or not geometry.is_valid: + continue + + # TODO (forman): allow transforming geometry into CRS of dataset here + intersection_geometry = intersect_geometries(dataset_bounds, geometry) if intersection_geometry is None: continue mask_data = get_geometry_mask(width, height, intersection_geometry, ds_x_min, ds_y_min, spatial_res) - mask = xr.DataArray(mask_data, - coords={y_var_name: y_var, x_var_name: x_var}, - dims=(y_dim, x_dim)) + mask = xr.DataArray(mask_data, coords=coords, dims=dims) for feature_property_name in feature_property_names: - var_name = feature_property_name_mapping.get(feature_property_name, feature_property_name) - feature_property_value = float(feature['properties'].get(feature_property_name, float('nan'))) - feature_property_var = xr.DataArray(np.full((height, width), feature_property_value, dtype=np.float64), - coords={y_var_name: y_var, x_var_name: x_var}, - dims=(y_dim, x_dim)) - if feature_property_name not in dataset: - feature_property_var_old = xr.DataArray(np.full((height, width), np.nan, dtype=np.float64), - coords={y_var_name: y_var, x_var_name: x_var}, - dims=(y_dim, x_dim)) - dataset[var_name] = feature_property_var_old + feature_properties = feature.get('properties', {}) + if not feature_properties: + continue + + var_name = var_properties.get(feature_property_name, feature_property_name) + var_dtype = np.float64 + var_fill_value = np.nan + var_attrs = None + if not isinstance(var_name, str): + var_name, var_dtype, var_fill_value, var_attrs = var_name + + feature_property_value = float(feature_properties.get(feature_property_name, var_fill_value)) + var_new = xr.DataArray(np.full((height, width), feature_property_value, dtype=var_dtype), + coords=coords, dims=dims, attrs=var_attrs) + if var_name not in dataset: + var_old = xr.DataArray(np.full((height, width), var_fill_value, dtype=var_dtype), + coords=coords, dims=dims, attrs=var_attrs) + dataset[var_name] = var_old else: - feature_property_var_old = dataset[var_name] + var_old = dataset[var_name] - dataset[var_name] = feature_property_var.where(mask, feature_property_var_old) + dataset[var_name] = var_new.where(mask, var_old) + dataset[var_name].encoding.update(fill_value=var_fill_value) return dataset diff --git a/xcube/core/new.py b/xcube/core/new.py index 702e3f698..2601fa4f6 100644 --- a/xcube/core/new.py +++ b/xcube/core/new.py @@ -4,21 +4,28 @@ import pandas as pd import xarray as xr -_TIME_DTYPE = "datetime64[s]" -_TIME_UNITS = "seconds since 1970-01-01T00:00:00" -_TIME_CALENDAR = "proleptic_gregorian" - -def new_cube(title="Test Cube", +def new_cube(title='Test Cube', width=360, height=180, - spatial_res=1.0, - lon_start=-180.0, - lat_start=-90.0, + x_name='lon', + y_name='lat', + x_dtype='float64', + y_dtype=None, + x_units='degrees_east', + y_units='degrees_north', + x_res=1.0, + y_res=None, + x_start=-180.0, + y_start=-90.0, + inverse_y=False, + time_name='time', + time_dtype='datetime64[s]', + time_units='seconds since 1970-01-01T00:00:00', + time_calendar='proleptic_gregorian', time_periods=5, time_freq="D", time_start='2010-01-01T00:00:00', - inverse_lat=False, drop_bounds=False, variables=None): """ @@ -30,100 +37,137 @@ def new_cube(title="Test Cube", array-like objects, or functions that compute their return value from passed coordinate indexes. The expected signature is::: - def my_func(time: int, lat: int, lon: int) -> Union[bool, int, float] - - :param title: A title. - :param width: Horizontal number of grid cells - :param height: Vertical number of grid cells - :param spatial_res: Spatial resolution in degrees - :param lon_start: Minimum longitude value - :param lat_start: Minimum latitude value - :param time_periods: Number of time steps - :param time_freq: Duration of each time step - :param time_start: First time value - :param inverse_lat: Whether to create an inverse latitude axis - :param drop_bounds: If True, coordinate bounds variables are not created. - :param variables: Dictionary of data variables to be added. + def my_func(time: int, y: int, x: int) -> Union[bool, int, float] + + :param title: A title. Defaults to 'Test Cube'. + :param width: Horizontal number of grid cells. Defaults to 360. + :param height: Vertical number of grid cells. Defaults to 180. + :param x_name: Name of the x coordinate variable. Defaults to 'lon'. + :param y_name: Name of the y coordinate variable. Defaults to 'lat'. + :param x_dtype: Data type of x coordinates. Defaults to 'float64'. + :param y_dtype: Data type of y coordinates. Defaults to 'float64'. + :param x_units: Units of the x coordinates. Defaults to 'degrees_east'. + :param y_units: Units of the y coordinates. Defaults to 'degrees_north'. + :param x_start: Minimum x value. Defaults to -180. + :param y_start: Minimum y value. Defaults to -90. + :param x_res: Spatial resolution in x-direction. Defaults to 1.0. + :param y_res: Spatial resolution in y-direction. Defaults to 1.0. + :param inverse_y: Whether to create an inverse y axis. Defaults to False. + :param time_name: Name of the time coordinate variable. Defaults to 'time'. + :param time_periods: Number of time steps. Defaults to 5. + :param time_freq: Duration of each time step. Defaults to `1D'. + :param time_start: First time value. Defaults to '2010-01-01T00:00:00'. + :param time_dtype: Numpy data type for time coordinates. Defaults to 'datetime64[s]'. + :param time_units: Units for time coordinates. Defaults to 'seconds since 1970-01-01T00:00:00'. + :param time_calendar: Calender for time coordinates. Defaults to 'proleptic_gregorian'. + :param drop_bounds: If True, coordinate bounds variables are not created. Defaults to False. + :param variables: Dictionary of data variables to be added. None by default. :return: A cube instance """ - lon_end = lon_start + width * spatial_res - lat_end = lat_start + height * spatial_res - if width < 0 or height < 0 or spatial_res <= 0.0: - raise ValueError() - if lon_start < -180. or lon_end > 180. or lat_start < -90. or lat_end > 90.: + y_dtype = y_dtype if y_dtype is not None else y_dtype + y_res = y_res if y_res is not None else x_res + if width < 0 or height < 0 or x_res <= 0.0 or y_res <= 0.0: raise ValueError() if time_periods < 0: raise ValueError() - spatial_res_05 = 0.5 * spatial_res + x_is_lon = x_name == 'lon' or x_units == 'degrees_east' + y_is_lat = y_name == 'lat' or y_units == 'degrees_north' - lon_data = np.linspace(lon_start + spatial_res_05, lon_end - spatial_res_05, width) - lon = xr.DataArray(lon_data, dims="lon") - lon.attrs["long_name"] = "longitude" - lon.attrs["units"] = "degrees_east" + x_end = x_start + width * x_res + y_end = y_start + height * y_res - lat_data = np.linspace(lat_start + 0.5 * spatial_res, lat_end - 0.5 * spatial_res, height) - lat = xr.DataArray(lat_data, dims="lat") - if inverse_lat: - lat = lat[::-1] - lat.attrs["long_name"] = "latitude" - lat.attrs["units"] = "degrees_north" - - time_data_2 = pd.date_range(start=time_start, periods=time_periods + 1, freq=time_freq).values - time_data_2 = time_data_2.astype(dtype=_TIME_DTYPE) - time_delta = time_data_2[1] - time_data_2[0] - time_data = time_data_2[0:-1] + time_delta // 2 - time = xr.DataArray(time_data, dims="time") - time.encoding["units"] = _TIME_UNITS - time.encoding["calendar"] = _TIME_CALENDAR + if x_is_lon and x_start < -180. or x_end > 180.: + raise ValueError() - time_data_2 = pd.date_range(time_start, periods=time_periods + 1, freq=time_freq) + if y_is_lat and y_start < -90. or y_end > 90.: + raise ValueError() - coords = dict(lon=lon, lat=lat, time=time) + x_res_05 = 0.5 * x_res + y_res_05 = 0.5 * y_res + + x_data = np.linspace(x_start + x_res_05, x_end - x_res_05, width, dtype=x_dtype) + y_data = np.linspace(y_start + y_res_05, y_end - y_res_05, height, dtype=y_dtype) + + x_var = xr.DataArray(x_data, dims=x_name, attrs=dict(units=x_units)) + y_var = xr.DataArray(y_data, dims=y_name, attrs=dict(units=y_units)) + if inverse_y: + y_var = y_var[::-1] + + if x_is_lon: + x_var.attrs.update(long_name='longitude', + standard_name='longitude') + else: + x_var.attrs.update(long_name='x coordinate of projection', + standard_name='projection_x_coordinate') + if y_is_lat: + y_var.attrs.update(long_name='latitude', + standard_name='latitude') + else: + y_var.attrs.update(long_name='y coordinate of projection', + standard_name='projection_y_coordinate') + + time_data_p1 = pd.date_range(start=time_start, periods=time_periods + 1, freq=time_freq).values + time_data_p1 = time_data_p1.astype(dtype=time_dtype) + + time_delta = time_data_p1[1] - time_data_p1[0] + time_data = time_data_p1[0:-1] + time_delta // 2 + time_var = xr.DataArray(time_data, dims=time_name) + time_var.encoding['units'] = time_units + time_var.encoding['calendar'] = time_calendar + + coords = {x_name: x_var, y_name: y_var, time_name: time_var} if not drop_bounds: - lon_bnds_data = np.zeros((width, 2), dtype=np.float64) - lon_bnds_data[:, 0] = np.linspace(lon_start, lon_end - spatial_res, width) - lon_bnds_data[:, 1] = np.linspace(lon_start + spatial_res, lon_end, width) - lon_bnds = xr.DataArray(lon_bnds_data, dims=("lon", "bnds")) - lon_bnds.attrs["units"] = "degrees_east" - - lat_bnds_data = np.zeros((height, 2), dtype=np.float64) - lat_bnds_data[:, 0] = np.linspace(lat_start, lat_end - spatial_res, height) - lat_bnds_data[:, 1] = np.linspace(lat_start + spatial_res, lat_end, height) - if inverse_lat: - lat_bnds_data = lat_bnds_data[::-1, ::-1] - lat_bnds = xr.DataArray(lat_bnds_data, dims=("lat", "bnds")) - lat_bnds.attrs["units"] = "degrees_north" - - time_bnds_data = np.zeros((time_periods, 2), dtype="datetime64[ns]") - time_bnds_data[:, 0] = time_data_2[:-1] - time_bnds_data[:, 1] = time_data_2[1:] - time_bnds = xr.DataArray(time_bnds_data, dims=("time", "bnds")) - time_bnds.encoding["units"] = _TIME_UNITS - time_bnds.encoding["calendar"] = _TIME_CALENDAR - - lon.attrs["bounds"] = "lon_bnds" - lat.attrs["bounds"] = "lat_bnds" - time.attrs["bounds"] = "time_bnds" - - coords.update(dict(lon_bnds=lon_bnds, lat_bnds=lat_bnds, time_bnds=time_bnds)) - - attrs = { - "Conventions": "CF-1.7", - "title": title, - "time_coverage_start": str(time_data_2[0]), - "time_coverage_end": str(time_data_2[-1]), - "geospatial_lon_min": lon_start, - "geospatial_lon_max": lon_end, - "geospatial_lon_units": "degrees_east", - "geospatial_lat_min": lat_start, - "geospatial_lat_max": lat_end, - "geospatial_lat_units": "degrees_north", - } + x_bnds_name = f'{x_name}_bnds' + y_bnds_name = f'{y_name}_bnds' + time_bnds_name = f'{time_name}_bnds' + + bnds_dim = 'bnds' + + x_bnds_data = np.zeros((width, 2), dtype=np.float64) + x_bnds_data[:, 0] = np.linspace(x_start, x_end - x_res, width, dtype=x_dtype) + x_bnds_data[:, 1] = np.linspace(x_start + x_res, x_end, width, dtype=x_dtype) + y_bnds_data = np.zeros((height, 2), dtype=np.float64) + y_bnds_data[:, 0] = np.linspace(y_start, y_end - x_res, height, dtype=y_dtype) + y_bnds_data[:, 1] = np.linspace(y_start + x_res, y_end, height, dtype=y_dtype) + if inverse_y: + y_bnds_data = y_bnds_data[::-1, ::-1] + + x_bnds_var = xr.DataArray(x_bnds_data, dims=(x_name, bnds_dim), attrs=dict(units=x_units)) + y_bnds_var = xr.DataArray(y_bnds_data, dims=(y_name, bnds_dim), attrs=dict(units=y_units)) + + x_var.attrs['bounds'] = x_bnds_name + y_var.attrs['bounds'] = y_bnds_name + + time_bnds_data = np.zeros((time_periods, 2), dtype=time_data_p1.dtype) + time_bnds_data[:, 0] = time_data_p1[:-1] + time_bnds_data[:, 1] = time_data_p1[1:] + time_bnds_var = xr.DataArray(time_bnds_data, dims=(time_name, bnds_dim)) + time_bnds_var.encoding['units'] = time_units + time_bnds_var.encoding['calendar'] = time_calendar + + time_var.attrs['bounds'] = time_bnds_name + + coords.update({x_bnds_name: x_bnds_var, y_bnds_name: y_bnds_var, time_bnds_name: time_bnds_var}) + + attrs = dict(Conventions="CF-1.7", + title=title, + time_coverage_start=str(time_data_p1[0]), + time_coverage_end=str(time_data_p1[-1])) + + if x_is_lon: + attrs.update(dict(geospatial_lon_min=x_start, + geospatial_lon_max=x_end, + geospatial_lon_units=x_units)) + + if y_is_lat: + attrs.update(dict(geospatial_lat_min=y_start, + geospatial_lat_max=y_end, + geospatial_lat_units=y_units)) data_vars = {} if variables: - dims = ("time", "lat", "lon") + dims = (time_name, y_name, x_name) shape = (time_periods, height, width) size = time_periods * height * width for var_name, data in variables.items(): diff --git a/xcube/core/xarray.py b/xcube/core/xarray.py index 1da884fff..08abb4cc5 100644 --- a/xcube/core/xarray.py +++ b/xcube/core/xarray.py @@ -34,48 +34,13 @@ def __init__(self, dataset: xr.Dataset): self._dataset = dataset @classmethod - def new(cls, - title="Test Cube", - width=360, - height=180, - spatial_res=1.0, - lon_start=-180.0, - lat_start=-90.0, - time_periods=5, - time_freq="D", - time_start='2010-01-01T00:00:00', - inverse_lat=False, - drop_bounds=False, - variables=None) -> xr.Dataset: + def new(cls, **kwargs) -> xr.Dataset: """ Create a new empty cube. Useful for testing. - :param title: A title. - :param width: Horizontal number of grid cells - :param height: Vertical number of grid cells - :param spatial_res: Spatial resolution in degrees - :param lon_start: Minimum longitude value - :param lat_start: Minimum latitude value - :param time_periods: Number of time steps - :param time_freq: Duration of each time step - :param time_start: First time value - :param inverse_lat: Whether to create an inverse latitude axis - :param drop_bounds: If True, coordinate bounds variables are not created. - :param variables: Dictionary of data variables to be added. - :return: A cube instance + Refer to :func:`xcube.core.new.new_cube` for details. """ - return new_cube(title=title, - width=width, - height=height, - spatial_res=spatial_res, - lon_start=lon_start, - lat_start=lat_start, - time_periods=time_periods, - time_freq=time_freq, - time_start=time_start, - inverse_lat=inverse_lat, - drop_bounds=drop_bounds, - variables=variables) + return new_cube(**kwargs) @classmethod def open(cls, input_path: str, format_name: str = None, **kwargs) -> xr.Dataset: From 2afa02280292c7bff99cb7bce38ea7b922c2a800 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Mon, 11 Nov 2019 11:21:29 +0100 Subject: [PATCH 4/5] api doc update --- xcube/core/geom.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xcube/core/geom.py b/xcube/core/geom.py index a65e0da5a..188b7d8b9 100644 --- a/xcube/core/geom.py +++ b/xcube/core/geom.py @@ -64,6 +64,9 @@ def rasterize_features_into_dataset(dataset: xr.Dataset, If only variable name is given, the defaults are ``(name, dtype='float64', fill_value=float('nan'), attributes=None)``. + Currently, the coordinates of the geometries in the given *features* must use the same CRS as + the given *dataset*. + :param dataset: The xarray dataset. :param features: Sequence of GeoJSON features. :param feature_property_names: Sequence of names of numeric feature properties to be rasterized. From c736756a47830a82d4b42963d82d18ebb49752dc Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Mon, 11 Nov 2019 11:21:46 +0100 Subject: [PATCH 5/5] update --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 123b004a2..1f13d5730 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,7 +18,8 @@ function that is applied to one or more input cubes. Replaces the formerly hidden `xcube apply` command. (#167) -* Added new function `xcube.core.geom.` to rasterize features (#222) +* Added new function `xcube.core.geom.rasterize_features_into_dataset()` + to rasterize features into a dataset. (#222) ### Enhancements