diff --git a/test/api/test_ts.py b/test/api/test_ts.py index 55c273cc5..b30f7048e 100644 --- a/test/api/test_ts.py +++ b/test/api/test_ts.py @@ -21,7 +21,7 @@ def test_polygon(self): [10.0, 20.0], [10.0, 10.0], [20.0, 10.0]]])) - self.assert_dataset_ok(ts_ds, 110, True, False, False, True, False, False) + self.assert_dataset_ok(ts_ds, 100, True, False, False, True, False, False) def test_polygon_incl_count_stdev(self): ts_ds = get_time_series(self.cube, @@ -33,7 +33,7 @@ def test_polygon_incl_count_stdev(self): [20.0, 10.0]]]), include_count=True, include_stdev=True) - self.assert_dataset_ok(ts_ds, 110, True, True, True, True, True, True) + self.assert_dataset_ok(ts_ds, 100, True, True, True, True, True, True) def test_polygon_incl_count(self): ts_ds = get_time_series(self.cube, @@ -44,7 +44,7 @@ def test_polygon_incl_count(self): [10.0, 10.0], [20.0, 10.0]]]), include_count=True) - self.assert_dataset_ok(ts_ds, 110, True, True, False, True, True, False) + self.assert_dataset_ok(ts_ds, 100, True, True, False, True, True, False) def test_polygon_incl_stdev_var_subs(self): ts_ds = get_time_series(self.cube, @@ -56,7 +56,7 @@ def test_polygon_incl_stdev_var_subs(self): [20.0, 10.0]]]), var_names=['B'], include_stdev=True) - self.assert_dataset_ok(ts_ds, 110, False, False, False, True, False, True) + self.assert_dataset_ok(ts_ds, 100, False, False, False, True, False, True) def test_polygon_using_groupby(self): ts_ds = get_time_series(self.cube, @@ -69,7 +69,7 @@ def test_polygon_using_groupby(self): include_count=True, include_stdev=True, use_groupby=True) - self.assert_dataset_ok(ts_ds, 110, True, True, True, True, True, True) + self.assert_dataset_ok(ts_ds, 100, True, True, True, True, True, True) def test_no_vars(self): ts_ds = get_time_series(self.cube, @@ -86,10 +86,10 @@ def setUp(self): shape = 25, 180, 360 dims = 'time', 'lat', 'lon' self.ts_a = np.linspace(1, 25, 25) - self.ts_a_count = np.array(25 * [110]) + self.ts_a_count = np.array(25 * [100]) self.ts_a_stdev = np.array(25 * [0.0]) self.ts_b = np.linspace(0, 1, 25) - self.ts_b_count = np.array(25 * [110]) + self.ts_b_count = np.array(25 * [100]) self.ts_b_stdev = np.array(25 * [0.0]) self.cube = new_cube(time_periods=25, variables=dict( diff --git a/test/util/test_geojson.py b/test/util/test_geojson.py index f83881bdc..79699177f 100644 --- a/test/util/test_geojson.py +++ b/test/util/test_geojson.py @@ -13,7 +13,6 @@ def test_is_geometry(self): self.assertTrue(GeoJSON.is_geometry(dict(type='Point', coordinates=[2.13, 42.2]))) self.assertTrue(GeoJSON.is_geometry(dict(type='Point', coordinates=None))) self.assertFalse(GeoJSON.is_geometry(dict(type='Point'))) - self.assertFalse(GeoJSON.is_geometry(dict(type='Point', coordinates='Argh!'))) self.assertTrue(GeoJSON.is_geometry(dict(type='GeometryCollection', geometries=None))) self.assertTrue(GeoJSON.is_geometry(dict(type='GeometryCollection', geometries=[]))) diff --git a/test/util/test_geom.py b/test/util/test_geom.py index 5e3317b2b..7072b2c47 100644 --- a/test/util/test_geom.py +++ b/test/util/test_geom.py @@ -4,7 +4,118 @@ import shapely.geometry import xarray as xr -from xcube.util.geom import get_dataset_geometry, get_dataset_bounds, get_geometry_mask, convert_geometry +from xcube.api import new_cube +from xcube.util.geom import get_dataset_geometry, get_dataset_bounds, get_geometry_mask, convert_geometry, \ + mask_dataset_by_geometry, clip_dataset_by_geometry + + +class DatasetGeometryTest(unittest.TestCase): + + def setUp(self) -> None: + width = 16 + height = 8 + spatial_res = 360 / width + lon_min = -2 * spatial_res + 0.5 * spatial_res + lat_min = -2 * spatial_res + 0.5 * spatial_res + lon_max = lon_min + 6 * spatial_res + lat_max = lat_min + 3 * spatial_res + + self.triangle = shapely.geometry.Polygon(((lon_min, lat_min), + (lon_max, lat_min), + (0.5 * (lon_max + lon_min), lat_max), + (lon_min, lat_min))) + + self.cube = new_cube(width=width, + height=height, + spatial_res=spatial_res, + drop_bounds=True, + variables=dict(temp=273.9, precip=0.9)) + + def test_clip_dataset_by_geometry(self): + cube = clip_dataset_by_geometry(self.cube, self.triangle) + self._assert_clipped_dataset_has_basic_props(cube) + cube = clip_dataset_by_geometry(self.cube, self.triangle, save_geometry=True) + self._assert_clipped_dataset_has_basic_props(cube) + self._assert_saved_geometry_wkt_is_fine(cube, 'geometry_wkt') + cube = clip_dataset_by_geometry(self.cube, self.triangle, save_geometry='intersect_geom') + self._assert_saved_geometry_wkt_is_fine(cube, 'intersect_geom') + + def test_mask_dataset_by_geometry(self): + cube = mask_dataset_by_geometry(self.cube, self.triangle) + self._assert_clipped_dataset_has_basic_props(cube) + cube = mask_dataset_by_geometry(self.cube, self.triangle, save_geometry_wkt=True) + self._assert_saved_geometry_wkt_is_fine(cube, 'geometry_wkt') + cube = mask_dataset_by_geometry(self.cube, self.triangle, save_geometry_wkt='intersect_geom') + self._assert_saved_geometry_wkt_is_fine(cube, 'intersect_geom') + + def test_mask_dataset_by_geometry_store_mask(self): + cube = mask_dataset_by_geometry(self.cube, self.triangle, save_geometry_mask='geom_mask') + self._assert_clipped_dataset_has_basic_props(cube) + self._assert_dataset_mask_is_fine(cube, 'geom_mask') + + def _assert_clipped_dataset_has_basic_props(self, dataset): + self.assertEqual({'time': 5, 'lat': 4, 'lon': 7}, dataset.dims) + self.assertIn('temp', dataset) + self.assertIn('precip', dataset) + temp = dataset['temp'] + precip = dataset['precip'] + self.assertEqual(('time', 'lat', 'lon'), temp.dims) + self.assertEqual((5, 4, 7), temp.shape) + self.assertEqual(('time', 'lat', 'lon'), precip.dims) + self.assertEqual((5, 4, 7), precip.shape) + + def _assert_dataset_mask_is_fine(self, dataset, mask_var_name): + self.assertIn(mask_var_name, dataset) + geom_mask = dataset[mask_var_name] + self.assertEqual(('lat', 'lon'), geom_mask.dims) + self.assertEqual((4, 7), geom_mask.shape) + np.testing.assert_array_almost_equal(geom_mask.values.astype(np.byte), + np.array([[0, 0, 0, 1, 0, 0, 0], + [0, 0, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 1, 1]], dtype=np.byte)) + + def _assert_saved_geometry_wkt_is_fine(self, dataset, geometry_wkt_name): + self.assertIn(geometry_wkt_name, dataset.attrs) + self.assertEqual('POLYGON ((-33.75 -33.75, 33.75 33.75, 101.25 -33.75, -33.75 -33.75))', + dataset.attrs[geometry_wkt_name]) + + +class GetGeometryMaskTest(unittest.TestCase): + def test_get_geometry_mask(self): + w = 16 + h = 8 + res = 1.0 + lon_min = 0 + lat_min = 0 + lon_max = lon_min + w * res + lat_max = lat_min + h * res + + triangle = shapely.geometry.Polygon(((lon_min, lat_min), (lon_max, lat_min), (lon_max, lat_max), + (lon_min, lat_min))) + + actual_mask = get_geometry_mask(w, h, triangle, lon_min, lat_min, res) + expected_mask = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=np.byte) + np.testing.assert_array_almost_equal(expected_mask, actual_mask.astype('byte')) + + smaller_triangle = triangle.buffer(-1.5 * res) + actual_mask = get_geometry_mask(w, h, smaller_triangle, lon_min, lat_min, res) + expected_mask = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.byte) + np.testing.assert_array_almost_equal(expected_mask, actual_mask) class GetDatasetGeometryTest(unittest.TestCase): @@ -75,43 +186,6 @@ def _get_antimeridian_datasets(): return ds1, ds2 -class GetGeometryMaskTest(unittest.TestCase): - def test_get_geometry_mask(self): - w = 16 - h = 8 - res = 1.0 - lon_min = 0 - lat_min = 0 - lon_max = lon_min + w * res - lat_max = lat_min + h * res - - triangle = shapely.geometry.Polygon(((lon_min, lat_min), (lon_max, lat_min), (lon_max, lat_max), - (lon_min, lat_min))) - - actual_mask = get_geometry_mask(w, h, triangle, lon_min, lat_min, res) - expected_mask = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=np.byte) - np.testing.assert_array_almost_equal(expected_mask, actual_mask.astype('byte')) - - smaller_triangle = triangle.buffer(-1.5 * res) - actual_mask = get_geometry_mask(w, h, smaller_triangle, lon_min, lat_min, res) - expected_mask = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0], - [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.byte) - np.testing.assert_array_almost_equal(expected_mask, actual_mask) - - class ConvertGeometryTest(unittest.TestCase): def test_convert_null(self): self.assertIs(None, convert_geometry(None)) @@ -123,26 +197,55 @@ def test_convert_to_point(self): self.assertEqual(expected_point, convert_geometry([12.8, -34.4])) self.assertEqual(expected_point, - convert_geometry('POINT (12.8 -34.4)')) + convert_geometry(np.array([12.8, -34.4]))) + self.assertEqual(expected_point, + convert_geometry(expected_point.wkt)) + self.assertEqual(expected_point, + convert_geometry(expected_point.__geo_interface__)) + + def test_convert_box_as_point(self): + expected_point = shapely.geometry.Point(12.8, -34.4) self.assertEqual(expected_point, - convert_geometry(dict(type='Point', coordinates=[12.8, -34.4]))) + convert_geometry([12.8, -34.4, 12.8, -34.4])) def test_convert_to_box(self): - expected_box = shapely.geometry.Polygon( - [(12.8, -34.4), (14.2, -34.4), (14.2, 20.6), (12.8, 20.6), (12.8, -34.4)]) + expected_box = shapely.geometry.box(12.8, -34.4, 14.2, 20.6) self.assertIs(expected_box, convert_geometry(expected_box)) self.assertEqual(expected_box, convert_geometry([12.8, -34.4, 14.2, 20.6])) self.assertEqual(expected_box, - convert_geometry('POLYGON ((12.8 -34.4, 14.2 -34.4, 14.2 20.6, 12.8 20.6, 12.8 -34.4))')) + convert_geometry(np.array([12.8, -34.4, 14.2, 20.6]))) + self.assertEqual(expected_box, + convert_geometry(expected_box.wkt)) self.assertEqual(expected_box, - convert_geometry(dict(type='Polygon', coordinates=[ - [[12.8, -34.4], [14.2, -34.4], [14.2, 20.6], [12.8, 20.6], [12.8, -34.4]] - ]))) + convert_geometry(expected_box.__geo_interface__)) + + def test_convert_to_split_box(self): + expected_split_box = shapely.geometry.MultiPolygon(polygons=[ + shapely.geometry.Polygon(((180.0, -34.4), (180.0, 20.6), (172.1, 20.6), (172.1, -34.4), + (180.0, -34.4))), + shapely.geometry.Polygon(((-165.7, -34.4), (-165.7, 20.6), (-180.0, 20.6), (-180.0, -34.4), + (-165.7, -34.4)))]) + actual_split_box = convert_geometry([172.1, -34.4, -165.7, 20.6]) + self.assertEqual(expected_split_box, + actual_split_box) + + def test_convert_invalid_box(self): + expected_msg = 'Invalid box coordinates' + + with self.assertRaises(ValueError) as cm: + convert_geometry([12.8, 20.6, 14.2, -34.4]) + self.assertEqual(expected_msg, f'{cm.exception}') + with self.assertRaises(ValueError) as cm: + convert_geometry([12.8, -34.4, 12.8, 20.6]) + self.assertEqual(expected_msg, f'{cm.exception}') + with self.assertRaises(ValueError) as cm: + convert_geometry([12.8, -34.4, 12.8, 20.6]) + self.assertEqual(expected_msg, f'{cm.exception}') def test_invalid(self): - expected_msg = ('geometry must be either a (shapely) geometry object, ' + expected_msg = ('Geometry must be either a (shapely) geometry object, ' 'a valid GeoJSON object, a valid WKT string, ' 'box coordinates (x1, y1, x2, y2), ' 'or point coordinates (x, y)') diff --git a/test/webapi/controllers/test_time_series.py b/test/webapi/controllers/test_time_series.py index 6c48c8b16..e9efb4679 100644 --- a/test/webapi/controllers/test_time_series.py +++ b/test/webapi/controllers/test_time_series.py @@ -9,6 +9,9 @@ class TimeSeriesControllerTest(unittest.TestCase): + def setUp(self) -> None: + self.maxDiff = None + def test_get_time_series_for_point_invalid_lat_and_lon(self): ctx = new_test_service_context() time_series = get_time_series_for_point(ctx, 'demo', 'conc_tsm', @@ -111,17 +114,17 @@ def test_get_time_series_for_geometry_polygon(self): [1., 51.], [2., 51.], [2., 52.], [1., 52.], [1., 51.] ]])) expected_dict = {'results': [{'date': '2017-01-16T10:09:22Z', - 'result': {'average': 56.0228561816751, + 'result': {'average': 56.12519223634024, 'totalCount': 1, - 'validCount': 122738}}, + 'validCount': 122392}}, {'date': '2017-01-25T09:35:51Z', 'result': {'average': None, 'totalCount': 1, 'validCount': 0}}, {'date': '2017-01-26T10:50:17Z', 'result': {'average': None, 'totalCount': 1, 'validCount': 0}}, {'date': '2017-01-28T09:58:11Z', - 'result': {'average': 49.71656646340396, + 'result': {'average': 49.70755256053988, 'totalCount': 1, - 'validCount': 132716}}, + 'validCount': 132066}}, {'date': '2017-01-30T10:46:34Z', 'result': {'average': None, 'totalCount': 1, 'validCount': 0}}]} @@ -134,9 +137,9 @@ def test_get_time_series_for_geometry_polygon_one_valid(self): [1., 51.], [2., 51.], [2., 52.], [1., 52.], [1., 51.] ]]), max_valids=1) expected_dict = {'results': [{'date': '2017-01-28T09:58:11Z', - 'result': {'average': 49.71656646340396, + 'result': {'average': 49.70755256053988, 'totalCount': 1, - 'validCount': 132716}}]} + 'validCount': 132066}}]} self.assertEqual(expected_dict, time_series) @@ -147,13 +150,13 @@ def test_get_time_series_for_geometry_polygon_only_valids(self): [1., 51.], [2., 51.], [2., 52.], [1., 52.], [1., 51.] ]]), max_valids=-1) expected_dict = {'results': [{'date': '2017-01-16T10:09:22Z', - 'result': {'average': 56.0228561816751, + 'result': {'average': 56.12519223634024, 'totalCount': 1, - 'validCount': 122738}}, + 'validCount': 122392}}, {'date': '2017-01-28T09:58:11Z', - 'result': {'average': 49.71656646340396, + 'result': {'average': 49.70755256053988, 'totalCount': 1, - 'validCount': 132716}}]} + 'validCount': 132066}}]} self.assertEqual(expected_dict, time_series) @@ -190,17 +193,17 @@ def test_get_time_series_for_geometries_incl_polygon(self): [1., 51.] ]])])) expected_dict = {'results': [[{'date': '2017-01-16T10:09:22Z', - 'result': {'average': 56.0228561816751, + 'result': {'average': 56.12519223634024, 'totalCount': 1, - 'validCount': 122738}}, + 'validCount': 122392}}, {'date': '2017-01-25T09:35:51Z', 'result': {'average': None, 'totalCount': 1, 'validCount': 0}}, {'date': '2017-01-26T10:50:17Z', 'result': {'average': None, 'totalCount': 1, 'validCount': 0}}, {'date': '2017-01-28T09:58:11Z', - 'result': {'average': 49.71656646340396, + 'result': {'average': 49.70755256053988, 'totalCount': 1, - 'validCount': 132716}}, + 'validCount': 132066}}, {'date': '2017-01-30T10:46:34Z', 'result': {'average': None, 'totalCount': 1, 'validCount': 0}}]]} diff --git a/xcube/api/__init__.py b/xcube/api/__init__.py index a71cb80c7..ff341a6d2 100644 --- a/xcube/api/__init__.py +++ b/xcube/api/__init__.py @@ -1,10 +1,8 @@ from .api import XCubeAPI from .dump import dump_dataset -from .extract import get_cube_values_for_points, get_cube_point_indexes, get_cube_values_for_indexes, \ - get_dataset_indexes, DEFAULT_INDEX_NAME_PATTERN, DEFAULT_REF_NAME_PATTERN, DEFAULT_INTERP_POINT_METHOD -from .gen.gen import gen_cube from .extract import DEFAULT_INDEX_NAME_PATTERN, DEFAULT_INTERP_POINT_METHOD, DEFAULT_REF_NAME_PATTERN, \ get_cube_point_indexes, get_cube_values_for_indexes, get_cube_values_for_points, get_dataset_indexes +from .gen.gen import gen_cube from .levels import compute_levels, read_levels, write_levels from .new import new_cube from .readwrite import open_cube, open_dataset, read_cube, read_dataset, write_dataset @@ -15,3 +13,5 @@ from .verify import assert_cube, verify_cube # noinspection PyUnresolvedReferences from ..util.chunk import chunk_dataset +# noinspection PyUnresolvedReferences +from ..util.geom import clip_dataset_by_geometry, mask_dataset_by_geometry, convert_geometry diff --git a/xcube/api/ts.py b/xcube/api/ts.py index b442454c6..0f204f3f2 100644 --- a/xcube/api/ts.py +++ b/xcube/api/ts.py @@ -28,13 +28,13 @@ from ..api.select import select_vars from ..api.verify import assert_cube -from ..util.geom import where_geometry, convert_geometry, Geometry, get_dataset_geometry +from ..util.geom import mask_dataset_by_geometry, convert_geometry, GeometryLike, get_dataset_geometry Date = Union[np.datetime64, str] def get_time_series(cube: xr.Dataset, - geometry: Geometry = None, + geometry: GeometryLike = None, var_names: Sequence[str] = None, start_date: Date = None, end_date: Date = None, @@ -94,7 +94,7 @@ def get_time_series(cube: xr.Dataset, return dataset.assign_attrs(max_number_of_observations=1) if geometry is not None: - dataset = where_geometry(dataset, geometry, mask_var_name='__mask__') + dataset = mask_dataset_by_geometry(dataset, geometry, save_geometry_mask='__mask__') if dataset is None: return None mask = dataset['__mask__'] diff --git a/xcube/util/geojson.py b/xcube/util/geojson.py index 15bfe2cf9..5d7971bd4 100644 --- a/xcube/util/geojson.py +++ b/xcube/util/geojson.py @@ -1,4 +1,27 @@ -from typing import Any, Optional, List, Dict +# The MIT License (MIT) +# Copyright (c) 2019 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do +# so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from typing import Any, Optional, Dict, Sequence + +from ..util.undefined import UNDEFINED class GeoJSON: @@ -16,43 +39,35 @@ def is_point(cls, obj: Any) -> bool: def is_geometry(cls, obj: Any) -> bool: type_name = cls.get_type_name(obj) if type_name in cls.PRIMITIVE_GEOMETRY_TYPES: - if 'coordinates' not in obj: - return False - coordinates = obj['coordinates'] - return coordinates is None or isinstance(coordinates, list) + return cls._is_valid_sequence(obj, 'coordinates') if type_name == cls.GEOMETRY_COLLECTION_TYPE: - if "geometries" not in obj: - return False - geometries = obj["geometries"] - return geometries is None or isinstance(geometries, list) + return cls._is_valid_sequence(obj, 'geometries') return False @classmethod - def get_geometry_collection_geometries(cls, obj: Any) -> Optional[List[Dict]]: + def get_geometry_collection_geometries(cls, obj: Any) -> Optional[Sequence[Dict]]: type_name = cls.get_type_name(obj) if type_name == cls.GEOMETRY_COLLECTION_TYPE: - if 'geometries' not in obj: + geometries = cls._get_sequence(obj, 'geometries') + if geometries is UNDEFINED: return None - geometries = obj['geometries'] - if geometries is None: + elif geometries is None: return [] - if isinstance(geometries, list): + else: return geometries - return None return None @classmethod - def get_feature_collection_features(cls, obj: Any) -> Optional[List[Dict]]: + def get_feature_collection_features(cls, obj: Any) -> Optional[Sequence[Dict]]: type_name = cls.get_type_name(obj) if type_name == cls.FEATURE_COLLECTION_TYPE: - if "features" not in obj: + features = cls._get_sequence(obj, 'features') + if features is UNDEFINED: return None - features = obj['features'] - if features is None: + elif features is None: return [] - if isinstance(features, list): + else: return features - return None return None @classmethod @@ -72,3 +87,21 @@ def get_type_name(cls, obj: Any) -> Optional[str]: if not isinstance(obj, dict) or "type" not in obj: return None return obj["type"] or None + + @classmethod + def _is_valid_sequence(cls, obj: Any, attr_name: str) -> bool: + return cls._get_sequence(obj, attr_name) is not UNDEFINED + + @classmethod + def _get_sequence(cls, obj: Any, attr_name: str) -> Optional[Sequence[Any]]: + if attr_name not in obj: + return UNDEFINED + sequence = obj[attr_name] + if sequence is None: + # GeoJSON sequence properties, 'coordinates', 'geometries', 'features', may be None according to spec + return None + try: + iter(sequence) + return sequence + except TypeError: + return UNDEFINED diff --git a/xcube/util/geom.py b/xcube/util/geom.py index b73a7c68e..4da0507c0 100644 --- a/xcube/util/geom.py +++ b/xcube/util/geom.py @@ -19,10 +19,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import math from typing import Optional, Union, Dict, Tuple, Sequence, Any import affine -import math import numpy as np import rasterio.features import shapely.geometry @@ -30,57 +30,140 @@ import shapely.wkt import xarray as xr -from xcube.util.geojson import GeoJSON +from .geojson import GeoJSON -Geometry = Union[shapely.geometry.base.BaseGeometry, Dict[str, Any], str, Sequence[Union[float, int]]] +GeometryLike = Union[shapely.geometry.base.BaseGeometry, Dict[str, Any], str, Sequence[Union[float, int]]] Bounds = Tuple[float, float, float, float] SplitBounds = Tuple[Bounds, Optional[Bounds]] +_INVALID_GEOMETRY_MSG = ('Geometry must be either a (shapely) geometry object, ' + 'a valid GeoJSON object, a valid WKT string, ' + 'box coordinates (x1, y1, x2, y2), or point coordinates (x, y)') -def where_geometry(dataset: xr.Dataset, - geometry: Geometry, - mask_var_name: str = None) -> Optional[xr.Dataset]: +_INVALID_BOX_COORDS_MSG = 'Invalid box coordinates' + + +def mask_dataset_by_geometry(dataset: xr.Dataset, + geometry: GeometryLike, + no_clip: bool = False, + save_geometry_mask: Union[str, bool] = False, + save_geometry_wkt: Union[str, bool] = False) -> Optional[xr.Dataset]: + """ + Mask a dataset according to the given geometry. The cells of variables of the + returned dataset will have NaN-values where their spatial coordinates are not intersecting + the given geometry. + + :param dataset: The dataset + :param geometry: A geometry-like object, see py:function:`convert_geometry`. + :param no_clip: If True, the function will not clip the dataset before masking, this is, the + returned dataset will have the same dimension size as the given *dataset*. + :param save_geometry_mask: If value is a string, a variable named by *save_mask* representing the effective geometry mask + will be stored in the returned dataset. + :param save_mask: If the value is a string, the effective geometry mask array is stored as + a 2D data variable named by *save_mask*. + If the value is True, the name "geometry_mask" is used. + :param save_geometry_wkt: If the value is a string, the effective intersection geometry is stored as + a Geometry WKT string in the global attribute named by *save_geometry*. + If the value is True, the name "geometry_wkt" is used. + :return: The dataset spatial subset, or None if the bounding box of the dataset has a no or a zero area + intersection with the bounding box of the geometry. + """ geometry = convert_geometry(geometry) - ds_lon_min, ds_lat_min, ds_lon_max, ds_lat_max = get_dataset_bounds(dataset) - inv_y = float(dataset.lat[0]) < float(dataset.lat[-1]) - dataset_geometry = get_box_split_bounds_geometry(ds_lon_min, ds_lat_min, ds_lon_max, ds_lat_max) - # TODO: split geometry - split_geometry = geometry - actual_geometry = dataset_geometry.intersection(split_geometry) - if actual_geometry.is_empty: + + intersection_geometry = intersect_geometries(get_dataset_bounds(dataset), geometry) + if intersection_geometry is None: + return None + + if not no_clip: + dataset = _clip_dataset_by_geometry(dataset, intersection_geometry) + + ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds(dataset) + + width = dataset.dims['lon'] + height = dataset.dims['lat'] + spatial_res = (ds_x_max - ds_x_min) / width + + mask_data = get_geometry_mask(width, height, intersection_geometry, ds_x_min, ds_y_min, spatial_res) + mask = xr.DataArray(mask_data, + coords=dict(lat=dataset.lat, lon=dataset.lon), + dims=('lat', 'lon')) + + masked_vars = {} + for var_name in dataset.data_vars: + var = dataset[var_name] + masked_vars[var_name] = var.where(mask) + + masked_dataset = dataset.copy(data=masked_vars) + + _save_geometry_mask(masked_dataset, mask, save_geometry_mask) + _save_geometry_wkt(masked_dataset, intersection_geometry, save_geometry_wkt) + + return masked_dataset + + +def clip_dataset_by_geometry(dataset: xr.Dataset, + geometry: GeometryLike, + save_geometry: Union[str, bool] = False) -> Optional[xr.Dataset]: + """ + Spatially clip a dataset according to the bounding box of a given geometry. + + :param dataset: The dataset + :param geometry: A geometry-like object, see py:function:`convert_geometry`. + :param save_geometry: If the value is a string, the effective intersection geometry is stored as a Geometry WKT string + in the global attribute named by *save_geometry*. If the value is True, the name "geometry_wkt" is used. + :return: The dataset spatial subset, or None if the bounding box of the dataset has a no or a zero area + intersection with the bounding box of the geometry. + """ + intersection_geometry = intersect_geometries(get_dataset_bounds(dataset), geometry) + if intersection_geometry is None: return None + return _clip_dataset_by_geometry(dataset, intersection_geometry, save_geometry_wkt=save_geometry) + - width = len(dataset.lon) - height = len(dataset.lat) - res = (ds_lat_max - ds_lat_min) / height +def _clip_dataset_by_geometry(dataset: xr.Dataset, + intersection_geometry: shapely.geometry.base.BaseGeometry, + save_geometry_wkt: bool = False) -> Optional[xr.Dataset]: + # TODO (forman): the following code is wrong, if the dataset bounds cross the anti-meridian! - g_lon_min, g_lat_min, g_lon_max, g_lat_max = actual_geometry.bounds - x1 = _clamp(int(math.floor((g_lon_min - ds_lon_min) / res)), 0, width - 1) - x2 = _clamp(int(math.ceil((g_lon_max - ds_lon_min) / res)) + 1, 0, width - 1) - y1 = _clamp(int(math.floor((g_lat_min - ds_lat_min) / res)), 0, height - 1) - y2 = _clamp(int(math.ceil((g_lat_max - ds_lat_min) / res)) + 1, 0, height - 1) - if not inv_y: + ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds(dataset) + + width = dataset.lon.size + height = dataset.lat.size + res = (ds_y_max - ds_y_min) / height + + g_lon_min, g_lat_min, g_lon_max, g_lat_max = intersection_geometry.bounds + x1 = _clamp(int(math.floor((g_lon_min - ds_x_min) / res)), 0, width - 1) + x2 = _clamp(int(math.ceil((g_lon_max - ds_x_min) / res)), 0, width - 1) + y1 = _clamp(int(math.floor((g_lat_min - ds_y_min) / res)), 0, height - 1) + y2 = _clamp(int(math.ceil((g_lat_max - ds_y_min) / res)), 0, height - 1) + if not is_dataset_y_axis_inverted(dataset): _y1, _y2 = y1, y2 y1 = height - _y2 - 1 y2 = height - _y1 - 1 - ds_subset = dataset.isel(lon=slice(x1, x2), lat=slice(y1, y2)) - subset_ds_lon_min, subset_ds_lat_min, subset_ds_lon_max, subset_ds_lat_max = get_dataset_bounds(ds_subset) - subset_width = len(ds_subset.lon) - subset_height = len(ds_subset.lat) - mask_data = get_geometry_mask(subset_width, subset_height, actual_geometry, subset_ds_lon_min, subset_ds_lat_min, - res) - mask = xr.DataArray(mask_data, coords=dict(lat=ds_subset.lat, lon=ds_subset.lon), dims=('lat', 'lon')) + dataset_subset = dataset.isel(lon=slice(x1, x2), lat=slice(y1, y2)) + + # TODO (forman): Adjust global attrs + + _save_geometry_wkt(dataset_subset, intersection_geometry, save_geometry_wkt) + + return dataset_subset + - ds_subset_masked = xr.where(mask, ds_subset, np.nan) - if mask_var_name: - ds_subset_masked[mask_var_name] = mask +def _save_geometry_mask(dataset, mask, save_mask): + if save_mask: + var_name = save_mask if isinstance(save_mask, str) else 'geometry_mask' + dataset[var_name] = mask - return ds_subset_masked + +def _save_geometry_wkt(dataset, intersection_geometry, save_geometry): + if save_geometry: + attr_name = save_geometry if isinstance(save_geometry, str) else 'geometry_wkt' + dataset.attrs.update({attr_name: intersection_geometry.wkt}) def get_geometry_mask(width: int, height: int, - geometry: Geometry, + geometry: GeometryLike, lon_min: float, lat_min: float, res: float) -> np.ndarray: geometry = convert_geometry(geometry) # noinspection PyTypeChecker @@ -93,6 +176,90 @@ def get_geometry_mask(width: int, height: int, invert=True) +def intersect_geometries(geometry1: GeometryLike, geometry2: GeometryLike) \ + -> Optional[shapely.geometry.base.BaseGeometry]: + geometry1 = convert_geometry(geometry1) + if geometry1 is None: + return None + geometry2 = convert_geometry(geometry2) + if geometry2 is None: + return geometry1 + intersection_geometry = geometry1.intersection(geometry2) + if not intersection_geometry.is_valid or intersection_geometry.is_empty: + return None + return intersection_geometry + + +def convert_geometry(geometry: Optional[GeometryLike]) -> Optional[shapely.geometry.base.BaseGeometry]: + """ + Convert a geometry-like object into a shapely geometry object (``shapely.geometry.BaseGeometry``). + + A geometry-like object is may be any shapely geometry object, + * a dictionary that can be serialized to valid GeoJSON, + * a WKT string, + * a box given by a string of the form ",,," + or by a sequence of four numbers x1, y1, x2, y2, + * a point by a string of the form "," + or by a sequence of two numbers x, y. + + Handling of geometries crossing the antimeridian: + + * If box coordinates are given, it is allowed to pass x1, x2 where x1 > x2, + which is interpreted as a box crossing the antimeridian. In this case the function + splits the box along the antimeridian and returns a multi-polygon. + * In all other cases, 2D geometries are assumed to _not cross the antimeridian at all_. + + :param geometry: A geometry-like object + :return: Shapely geometry object or None. + """ + + if isinstance(geometry, shapely.geometry.base.BaseGeometry): + return geometry + + if isinstance(geometry, dict): + if not GeoJSON.is_geometry(geometry): + raise ValueError(_INVALID_GEOMETRY_MSG) + return shapely.geometry.shape(geometry) + + if isinstance(geometry, str): + return shapely.wkt.loads(geometry) + + if geometry is None: + return None + + invalid_box_coords = False + # noinspection PyBroadException + try: + x1, y1, x2, y2 = geometry + is_point = x1 == x2 and y1 == y2 + if is_point: + return shapely.geometry.Point(x1, y1) + invalid_box_coords = x1 == x2 or y1 >= y2 + if not invalid_box_coords: + return get_box_split_bounds_geometry(x1, y1, x2, y2) + except Exception: + # noinspection PyBroadException + try: + x, y = geometry + return shapely.geometry.Point(x, y) + except Exception: + pass + + if invalid_box_coords: + raise ValueError(_INVALID_BOX_COORDS_MSG) + raise ValueError(_INVALID_GEOMETRY_MSG) + + +def is_dataset_y_axis_inverted(dataset: Union[xr.Dataset, xr.DataArray]) -> bool: + if 'lat' in dataset.coords: + y = dataset.lat + elif 'y' in dataset.coords: + y = dataset.y + else: + raise ValueError("Neither 'lat' nor 'y' coordinate variable found.") + return float(y[0]) < float(y[-1]) + + def get_dataset_geometry(dataset: Union[xr.Dataset, xr.DataArray]) -> shapely.geometry.base.BaseGeometry: return get_box_split_bounds_geometry(*get_dataset_bounds(dataset)) @@ -151,42 +318,6 @@ def get_box_split_bounds_geometry(lon_min: float, lat_min: float, return shapely.geometry.box(*box_1) -_INVALID_GEOMETRY_MSG = ('geometry must be either a (shapely) geometry object, ' - 'a valid GeoJSON object, a valid WKT string, ' - 'box coordinates (x1, y1, x2, y2), or point coordinates (x, y)') - - -def convert_geometry(geometry: Optional[Geometry]) -> Optional[shapely.geometry.base.BaseGeometry]: - if isinstance(geometry, shapely.geometry.base.BaseGeometry): - return geometry - - if isinstance(geometry, dict): - if not GeoJSON.is_geometry(geometry): - raise ValueError(_INVALID_GEOMETRY_MSG) - return shapely.geometry.shape(geometry) - - if isinstance(geometry, str): - return shapely.wkt.loads(geometry) - - if geometry is None: - return None - - # noinspection PyBroadException - try: - x1, y1, x2, y2 = geometry - return shapely.geometry.shape(dict(type='Polygon', - coordinates=[[[x1, y1], [x2, y1], [x2, y2], [x1, y2], [x1, y1]]])) - except Exception: - # noinspection PyBroadException - try: - x, y = geometry - return shapely.geometry.shape(dict(type='Point', coordinates=[x, y])) - except Exception: - pass - - raise ValueError(_INVALID_GEOMETRY_MSG) - - def _clamp(x, x1, x2): if x < x1: return x1