From 7d540bb3654c76ab6596d1b7b33057c45c741411 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 20 Apr 2022 12:46:13 +0200 Subject: [PATCH] EP-3981 Add #71 #114 references to vector cube todo's --- openeo_driver/ProcessGraphDeserializer.py | 25 ++++++++++++----------- openeo_driver/datacube.py | 6 +++--- openeo_driver/delayed_vector.py | 1 + openeo_driver/dry_run.py | 7 ++++--- openeo_driver/dummy/dummy_backend.py | 6 +++--- openeo_driver/save_result.py | 11 +++++----- openeo_driver/utils.py | 3 ++- 7 files changed, 31 insertions(+), 28 deletions(-) diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py index 20e28426..e8a191d2 100644 --- a/openeo_driver/ProcessGraphDeserializer.py +++ b/openeo_driver/ProcessGraphDeserializer.py @@ -521,7 +521,7 @@ def vector_buffer(args: Dict, env: EvalEnv) -> dict: input_crs = 'epsg:4326' buffer_resolution = 3 - # TODO EP-3981 convert `geometry` to vector cube and move buffer logic to there + # TODO #114 EP-3981 convert `geometry` to vector cube and move buffer logic to there if isinstance(geometry, str): # TODO: assumption here that `geometry` is a path/url geoms = list(DelayedVector(geometry).geometries) @@ -664,7 +664,7 @@ def chunk_polygon(args: dict, env: EvalEnv) -> DriverDataCube: data_cube = extract_arg(args, 'data') # Chunks parameter check. - # TODO EP-3981 normalize first to vector cube and simplify logic + # TODO #114 EP-3981 normalize first to vector cube and simplify logic if isinstance(chunks, DelayedVector): polygons = list(chunks.geometries) for p in polygons: @@ -705,7 +705,7 @@ def fit_class_random_forest(args: dict, env: EvalEnv) -> DriverMlModel: predictors = extract_arg(args, 'predictors') if not isinstance(predictors, AggregatePolygonSpatialResult): - # TODO EP-3981 add support for real vector cubes. + # TODO #114 EP-3981 add support for real vector cubes. raise ProcessParameterInvalidException( parameter="predictors", process="fit_class_random_forest", reason=f"should be non-temporal vector-cube (got `{type(predictors)}`)." @@ -716,7 +716,7 @@ def fit_class_random_forest(args: dict, env: EvalEnv) -> DriverMlModel: and target.get("type") == "FeatureCollection" and isinstance(target.get("features"), list) ): - # TODO EP-3981 vector cube support + # TODO #114 EP-3981 vector cube support raise ProcessParameterInvalidException( parameter="target", process="fit_class_random_forest", reason='only GeoJSON FeatureCollection is currently supported.', @@ -949,7 +949,7 @@ def aggregate_spatial(args: dict, env: EvalEnv) -> DriverDataCube: target_dimension = args.get('target_dimension', None) geoms = extract_arg(args, 'geometries') - # TODO: convert all cases to DriverVectorCube first and just work with that + # TODO #114: convert all cases to DriverVectorCube first and just work with that if isinstance(geoms, DriverVectorCube): geoms = geoms elif isinstance(geoms, dict): @@ -993,11 +993,11 @@ def mask_polygon(args: dict, env: EvalEnv) -> DriverDataCube: replacement = args.get('replacement', None) inside = args.get('inside', False) - # TODO: instead of if-elif-else chain: generically "cast" to VectorCube first (e.g. for wide input + # TODO #114: instead of if-elif-else chain: generically "cast" to VectorCube first (e.g. for wide input # support: GeoJSON, WKT, ...) and then convert to MultiPolygon? if isinstance(mask, DelayedVector): # TODO: avoid reading DelayedVector twice due to dry-run? - # TODO EP-3981 embed DelayedVector in VectorCube implementation + # TODO #114 EP-3981 embed DelayedVector in VectorCube implementation polygon = shapely.ops.unary_union(list(mask.geometries)) elif isinstance(mask, DriverVectorCube): polygon = mask.to_multipolygon() @@ -1073,7 +1073,7 @@ def filter_spatial(args: Dict, env: EvalEnv) -> DriverDataCube: geometries = extract_arg(args, 'geometries') if not isinstance(geometries, dict): - # TODO: support DelayedVector + # TODO #114: support DriverDataCube raise NotImplementedError("filter_spatial only supports dict but got {g!r}".format(g=geometries)) geometries = geojson_to_geometry(geometries) @@ -1173,6 +1173,7 @@ def run_udf(args: dict, env: EvalEnv): if dry_run_tracer and isinstance(data, AggregatePolygonResult): return JSONResult({}) + # TODO #114 add support for DriverVectorCube if isinstance(data, AggregatePolygonResult): pass if isinstance(data, (DelayedVector, dict)): @@ -1346,15 +1347,15 @@ def apply_process(process_id: str, args: dict, namespace: Union[str, None], env: .returns("TODO", schema={"type": "object", "subtype": "vector-cube"}) ) def read_vector(args: Dict, env: EvalEnv) -> DelayedVector: - # TODO EP-3981: deprecated in favor of load_uploaded_files/load_external? https://github.com/Open-EO/openeo-processes/issues/322 + # TODO #114 EP-3981: deprecated in favor of load_uploaded_files/load_external? https://github.com/Open-EO/openeo-processes/issues/322 path = extract_arg(args, 'filename') return DelayedVector(path) @process_registry_100.add_function(spec=read_spec("openeo-processes/1.x/proposals/load_uploaded_files.json")) def load_uploaded_files(args: dict, env: EvalEnv) -> DriverVectorCube: - # TODO EP-3981 process name is still under discussion https://github.com/Open-EO/openeo-processes/issues/322 - # TODO EP-3981 also other return types: raster data cube, array, ... + # TODO #114 EP-3981 process name is still under discussion https://github.com/Open-EO/openeo-processes/issues/322 + # TODO also other return types: raster data cube, array, ... paths = extract_arg(args, 'paths', process_id="load_uploaded_files") format = extract_arg(args, 'format', process_id="load_uploaded_files") options = args.get("options", {}) @@ -1376,7 +1377,7 @@ def load_uploaded_files(args: dict, env: EvalEnv) -> DriverVectorCube: .returns("TODO", schema={"type": "object", "subtype": "vector-cube"}) ) def get_geometries(args: Dict, env: EvalEnv) -> Union[DelayedVector, dict]: - # TODO: standardize or deprecate this? EP-3981 https://github.com/Open-EO/openeo-processes/issues/322 + # TODO: standardize or deprecate this? #114 EP-3981 https://github.com/Open-EO/openeo-processes/issues/322 feature_collection = args.get('feature_collection', None) path = args.get('filename', None) if path is not None: diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py index 0592e6ab..b6abe962 100644 --- a/openeo_driver/datacube.py +++ b/openeo_driver/datacube.py @@ -156,7 +156,7 @@ class DriverVectorCube: DIM_GEOMETRIES = "geometries" def __init__(self, geometries: gpd.GeoDataFrame, cube: Optional[xarray.DataArray] = None): - # TODO EP-3981: consider other data containers (xarray) and lazy loading? + # TODO #114 EP-3981: lazy loading (like DelayedVector)? if cube is not None: if cube.dims[0] != self.DIM_GEOMETRIES: log.error(f"First cube dim should be {self.DIM_GEOMETRIES!r} but got dims {cube.dims!r}") @@ -176,9 +176,9 @@ def with_cube(self, cube: xarray.DataArray) -> "DriverVectorCube": def from_fiona(cls, paths: List[str], driver: str, options: dict): """Factory to load vector cube data using fiona/GeoPandas.""" if len(paths) != 1: - # TODO EP-3981: support multiple paths + # TODO #114 EP-3981: support multiple paths raise FeatureUnsupportedException(message="Loading a vector cube from multiple files is not supported") - # TODO EP-3981: lazy loading like/with DelayedVector + # TODO #114 EP-3981: lazy loading like/with DelayedVector return cls(geometries=gpd.read_file(paths[0], driver=driver)) def _as_geopandas_df(self) -> gpd.GeoDataFrame: diff --git a/openeo_driver/delayed_vector.py b/openeo_driver/delayed_vector.py index 40db662d..a566fa7a 100644 --- a/openeo_driver/delayed_vector.py +++ b/openeo_driver/delayed_vector.py @@ -199,6 +199,7 @@ def _read_shapefile_crs(shp_path: str) -> pyproj.CRS: @staticmethod def _as_geometry_collection(feature_collection: Dict) -> Dict: + # TODO #71 #114 Deprecate/avoid usage of GeometryCollection geometries = (feature['geometry'] for feature in feature_collection['features']) return { diff --git a/openeo_driver/dry_run.py b/openeo_driver/dry_run.py index 461a61f6..60d25cfc 100644 --- a/openeo_driver/dry_run.py +++ b/openeo_driver/dry_run.py @@ -443,7 +443,7 @@ def aggregate_spatial( reducer: dict, target_dimension: str = "result", ) -> Union[AggregatePolygonResult, AggregatePolygonSpatialResult]: - # TODO EP-3981 normalize to vector cube instead of GeometryCollection + # TODO #71 #114 EP-3981 normalize to vector cube instead of GeometryCollection geometries, bbox = self._normalize_geometry(geometries) cube = self.filter_bbox(**bbox, operation="_weak_spatial_extent") cube._process(operation="aggregate_spatial", arguments={"geometries": geometries}) @@ -456,7 +456,7 @@ def _normalize_geometry(self, geometries) -> Tuple[Union[DriverVectorCube, Delay Helper to preprocess geometries (as used in aggregate_spatial and mask_polygon) and extract bbox (e.g. for filter_bbox) """ - # TODO EP-3981 normalize to vector cube instead of GeometryCollection + # TODO #71 #114 EP-3981 normalize to vector cube instead of GeometryCollection if isinstance(geometries, DriverVectorCube): bbox = geometries.get_bounding_box() elif isinstance(geometries, dict): @@ -478,7 +478,7 @@ def _normalize_geometry(self, geometries) -> Tuple[Union[DriverVectorCube, Delay bbox = dict(west=bbox[0], south=bbox[1], east=bbox[2], north=bbox[3], crs="EPSG:4326") return geometries, bbox - # TODO: this is a workaround until vectorcube is fully upgraded + # TODO: #114 this is a workaround until vectorcube is fully upgraded def raster_to_vector(self): return AggregatePolygonResult(timeseries={}, regions=None) @@ -504,6 +504,7 @@ def reduce_dimension(self, reducer, dimension: str, context: Any, env: EvalEnv) def chunk_polygon(self, reducer, chunks: MultiPolygon, mask_value: float, env: EvalEnv, context={}) -> 'DryRunDataCube': polygons: List[Polygon] = chunks.geoms + # TODO #71 #114 Deprecate/avoid usage of GeometryCollection geometries, bbox = self._normalize_geometry(GeometryCollection(polygons)) cube = self.filter_bbox(**bbox, operation="_weak_spatial_extent") return cube._process("chunk_polygon", arguments={"geometries": geometries}) diff --git a/openeo_driver/dummy/dummy_backend.py b/openeo_driver/dummy/dummy_backend.py index c06a667c..16ee22f4 100644 --- a/openeo_driver/dummy/dummy_backend.py +++ b/openeo_driver/dummy/dummy_backend.py @@ -219,7 +219,7 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]): for g in geometries: assert isinstance(g, Polygon) or isinstance(g, MultiPolygon) - # TODO EP-3981 normalize to vector cube and preserve original properties + # TODO #114 EP-3981 normalize to vector cube and preserve original properties if isinstance(geometries, DriverVectorCube): # Build dummy aggregation data cube dims = (DriverVectorCube.DIM_GEOMETRIES,) @@ -239,7 +239,7 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]): geometries = [geometry for geometry in DelayedVector(geometries).geometries] assert_polygon_sequence(geometries) elif isinstance(geometries, GeometryCollection): - # TODO EP-3981: GeometryCollection is deprecated + # TODO #71 #114 EP-3981: GeometryCollection is deprecated assert_polygon_sequence(geometries) elif isinstance(geometries, BaseGeometry): assert_polygon_sequence([geometries]) @@ -256,7 +256,7 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]): class DummyAggregatePolygonSpatialResult(AggregatePolygonSpatialResult): - # TODO EP-3981 replace with proper VectorCube implementation + # TODO #114 EP-3981 replace with proper VectorCube implementation def __init__(self, cube: DummyDataCube, geometries: Iterable[BaseGeometry]): super().__init__(csv_dir="/dev/null", regions=geometries) diff --git a/openeo_driver/save_result.py b/openeo_driver/save_result.py index 20d53409..9f792e79 100644 --- a/openeo_driver/save_result.py +++ b/openeo_driver/save_result.py @@ -199,7 +199,7 @@ class AggregatePolygonResult(JSONResult): # TODO: if it supports NetCDF and CSV """ - # TODO EP-3981 port this to proper vector cube support + # TODO #71 #114 EP-3981 port this to proper vector cube support def __init__(self, timeseries: dict, regions: GeometryCollection, metadata:CollectionMetadata=None): super().__init__(data=timeseries) @@ -429,11 +429,10 @@ def to_covjson(self) -> dict: } - class AggregatePolygonResultCSV(AggregatePolygonResult): + # TODO #71 #114 EP-3981 port this to proper vector cube support - - def __init__(self, csv_dir, regions: GeometryCollection, metadata:CollectionMetadata=None): + def __init__(self, csv_dir, regions: GeometryCollection, metadata: CollectionMetadata = None): def _flatten_df(df): df.index = df.feature_index @@ -464,7 +463,7 @@ class AggregatePolygonSpatialResult(SaveResult): """ Container for result of `aggregate_polygon` process (aka "zonal stats") for a spatial layer. """ - # TODO EP-3981 replace with proper VectorCube implementation + # TODO #71 #114 EP-3981 replace with proper VectorCube implementation DEFAULT_FORMAT = "JSON" @@ -597,7 +596,7 @@ def to_save_result(data: Any, format: Optional[str] = None, options: Optional[di elif isinstance(data, DriverVectorCube): return VectorCubeResult(cube=data, format=format, options=options) elif isinstance(data, DelayedVector): - # TODO EP-3981 add vector cube support: keep features from feature collection + # TODO #114 EP-3981 add vector cube support: keep features from feature collection geojsons = [mapping(geometry) for geometry in data.geometries] return JSONResult(geojsons, format=format, options=options) elif isinstance(data, np.ndarray): diff --git a/openeo_driver/utils.py b/openeo_driver/utils.py index 1b181542..879589be 100644 --- a/openeo_driver/utils.py +++ b/openeo_driver/utils.py @@ -128,7 +128,7 @@ def smart_bool(value): def geojson_to_geometry(geojson: dict) -> shapely.geometry.base.BaseGeometry: """Convert GeoJSON object to shapely geometry object""" - # TODO EP-3981 standardize on using (FeatureCollection like) vector cubes instead of GeometryCollection? + # TODO #71 #114 EP-3981 standardize on using (FeatureCollection like) vector cubes instead of GeometryCollection? if geojson["type"] == "FeatureCollection": geojson = { 'type': 'GeometryCollection', @@ -147,6 +147,7 @@ def geojson_to_multipolygon( means dissolving overlapping polygons into one). """ # TODO: option to also force conversion of Polygon to MultiPolygon? + # TODO: #71 #114 migrate/centralize all this kind of logic to vector cubes if geojson["type"] == "Feature": geojson = geojson["geometry"]