From 12c89327c962d240f2f7972c40a1d490f762fbb3 Mon Sep 17 00:00:00 2001 From: Rose Pearson Date: Wed, 15 Jan 2025 11:19:42 +1300 Subject: [PATCH] various bug-fixes (#270) * Fixed lake behaviour * fixup: Format Python code with Black * optimisation to only use KD tree if in ROI * minor change to benchmark and updated instruction file for change to schema * ensure consistent behaviour between first and second run of waterways * Fix to ensure full with nan (no data value) instead of random empty values - ensure consistent behaviour * Add support for if ocean_points file - then make offshore fan to the offshore points in perference to contours. * Updated tests and committed optimisation to clipping * Fixed TB when no roads in a roughness domain * Only reindex patch if all x and y also in the destimation DEM and also it is of the same resolution. other minor code tidy up and corrections * updated version * Updated code to ensure expected DEM resolution is matched and updated test --------- Co-authored-by: github-actions --- pyproject.toml | 2 +- src/geofabrics/dem.py | 328 ++++++++++++------ src/geofabrics/geometry.py | 201 +++++++---- src/geofabrics/processor.py | 48 ++- src/geofabrics/version.py | 2 +- .../instruction.json | 2 +- .../data/benchmark.nc | 4 +- .../instruction.json | 2 +- .../data/benchmark.nc | 4 +- tests/test_many_stages_westport/test_case.py | 25 +- .../test_stopbanks_waikanae/data/benchmark.nc | 4 +- 11 files changed, 425 insertions(+), 197 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c7440234..5920609e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [project] name = "geofabrics" -version = "1.1.24" +version = "1.1.25" description = "A package for creating geofabrics for flood modelling." readme = "README.md" authors = [{ name = "Rose pearson", email = "rose.pearson@niwa.co.nz" }] diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index b3504ba1..f4a4b13f 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -315,6 +315,14 @@ def __init__(self, catchment_geometry: geometry.CatchmentGeometry, chunk_size: i def dem(self) -> xarray.Dataset: """Return the DEM over the catchment region""" raise NotImplementedError("dem must be instantiated in the child class") + + def _check_resolution(self, dem: xarray.Dataset): + """ Check the DEM resolution matches the specified resolution. """ + dem_resolution = dem.rio.resolution() + dem_resolution = max(abs(dem_resolution[0]), abs(dem_resolution[1])) + + return self.catchment_geometry.resolution == dem_resolution + def _load_dem(self, filename: pathlib.Path) -> xarray.Dataset: """Load in and replace the DEM with a previously cached version.""" @@ -337,6 +345,9 @@ def _load_dem(self, filename: pathlib.Path) -> xarray.Dataset: if "zo" in dem.keys(): dem["zo"] = dem.zo.astype(geometry.RASTER_TYPE) + if not self._check_resolution(dem): + raise ValueError("The specified resolution does not match the " + f"{filename} resolution.") return dem def save_dem( @@ -511,7 +522,7 @@ def _extents_from_mask(self, mask: numpy.ndarray, transform: dict): return dense_extents - def _chunks_from_dem(self, chunk_size, dem: xarray.Dataset) -> (list, list): + def _chunks_from_dem(self, chunk_size, dem: xarray.Dataset) -> tuple[list, list]: """Define the chunks to break the catchment into when reading in and downsampling LiDAR. @@ -631,7 +642,10 @@ def __init__( self._write_netcdf_conventions_in_place(raw_dem, catchment_geometry.crs) # Clip to catchment and set the data_source layer to NaN where there is no data - raw_dem = raw_dem.rio.clip(catchment_geometry.catchment.geometry, drop=True) + raw_dem = raw_dem.rio.clip_box(*tuple(catchment_geometry.catchment.total_bounds)) + raw_dem = raw_dem.where( + clip_mask(raw_dem.z, catchment_geometry.catchment.geometry, self.chunk_size) + ) raw_dem["data_source"] = raw_dem.data_source.where( raw_dem.data_source != self.SOURCE_CLASSIFICATION["no data"], numpy.nan, @@ -639,6 +653,10 @@ def __init__( # Rerun as otherwise the no data as NaN seems to be lost for the data_source layer self._write_netcdf_conventions_in_place(raw_dem, catchment_geometry.crs) + if not self._check_resolution(raw_dem): + raise ValueError("The specified resolution does not match the " + f"{raw_dem_path} resolution.") + # Set attributes self._raw_dem = raw_dem self.interpolation_method = interpolation_method @@ -884,6 +902,23 @@ def interpolate_ocean_chunked( delayed_chunked_x = [] for j, dim_x in enumerate(chunked_dim_x): self.logger.debug(f"\tLiDAR chunk {[i, j]}") + # Check ROI to tile + chunk_region_to_tile = self._define_chunk_region( + region_to_rasterise=region_to_rasterise, + dim_x=dim_x, + dim_y=dim_y, + radius=0, + ) + if chunk_region_to_tile.area.sum() == 0: + self.logger.debug(f"\t\tReturning empty tile as out of RIO") + delayed_chunked_x.append( + dask.array.full( + shape=(len(dim_y), len(dim_x)), + fill_value=numpy.nan, + dtype=raster_options["raster_type"], + ) + ) + continue # Load in points chunk_offshore_points = delayed_load_tiles_in_chunk( lidar_files=[offshore_file], @@ -1141,16 +1176,12 @@ def add_points_within_polygon_chunked( if include_edges: # Get edge points - from DEM - edge_dem = self._dem.rio.clip( - region_to_rasterise.dissolve().buffer( - self.catchment_geometry.resolution - ), - drop=True, - ) + edge_roi = region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution) + edge_dem = self._dem.rio.clip_box(*tuple(edge_roi.total_bounds)) + edge_dem = edge_dem.rio.clip(edge_roi) edge_dem = edge_dem.rio.clip( region_to_rasterise.dissolve().geometry, invert=True, - drop=True, ) # Define the edge points grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y) @@ -1203,6 +1234,24 @@ def add_points_within_polygon_chunked( delayed_chunked_x = [] for j, dim_x in enumerate(chunked_dim_x): self.logger.debug(f"\tLiDAR chunk {[i, j]}") + # Check ROI to tile + chunk_region_to_tile = self._define_chunk_region( + region_to_rasterise=region_to_rasterise, + dim_x=dim_x, + dim_y=dim_y, + radius=0, + ) + if chunk_region_to_tile.area.sum() == 0: + self.logger.debug(f"\t\tReturning empty tile as out of RIO") + delayed_chunked_x.append( + dask.array.full( + shape=(len(dim_y), len(dim_x)), + fill_value=numpy.nan, + dtype=raster_options["raster_type"], + ) + ) + continue + # Load in points river_points = delayed_load_tiles_in_chunk( lidar_files=[lidar_file], @@ -1253,7 +1302,8 @@ def add_points_within_polygon_nearest_chunked( method: str, cache_path: pathlib.Path, label: str, - k_nearest_neighbours: int = 100, + k_nearest_neighbours: int, + include_edges: bool = True, ) -> xarray.Dataset: """Performs interpolation from estimated bathymetry points within a polygon using the specified interpolation approach after filtering the points based @@ -1267,7 +1317,7 @@ def add_points_within_polygon_nearest_chunked( "method": method, "crs": crs, "k_nearest_neighbours": k_nearest_neighbours, - "use_edge": True, + "use_edge": include_edges, "strict": False, } if method == "rbf": @@ -1289,93 +1339,98 @@ def add_points_within_polygon_nearest_chunked( pdal_pipeline = pdal.Pipeline(json.dumps(pdal_pipeline_instructions), [points]) pdal_pipeline.execute() - # Tempoarily save the adjacent points from the DEM - edge_dem = self._dem.rio.clip( - region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution), - drop=True, - ) - self._write_netcdf_conventions_in_place(edge_dem, self.catchment_geometry.crs) - edge_dem["z"] = edge_dem.z.rio.interpolate_na(method="nearest") - edge_dem = self._dem.rio.clip( - region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution), - drop=True, - ) - edge_dem = edge_dem.rio.clip( - region_to_rasterise.dissolve().geometry, - invert=True, - drop=True, - ) - - # Save provided points - grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y) - flat_x = grid_x.flatten() - flat_y = grid_y.flatten() - flat_z = edge_dem.z.values.flatten() - mask_z = ~numpy.isnan(flat_z) - - # Interpolate the estimated bank heights around the polygon if they exist - if elevations.bank_heights_exist(): - # Get the estimated bank heights and define a mask where nan - bank_points = elevations.bank_height_points() - bank_nan_mask = numpy.logical_not(numpy.isnan(bank_points["Z"])) - # Interpolate from the estimated bank heights - xy_out = numpy.concatenate( - [[flat_x[mask_z]], [flat_y[mask_z]]], axis=0 - ).transpose() - options = { - "radius": elevations.points["width"].max(), - "raster_type": geometry.RASTER_TYPE, - "method": "linear", - "strict": False, - } - estimated_edge_z = elevation_from_points( - point_cloud=bank_points[bank_nan_mask], - xy_out=xy_out, - options=options, + # Tempoarily save the adjacent points from the DEM - ensure no NaN through NN interpolation + if include_edges: + edge_roi = region_to_rasterise.dissolve().buffer( + self.catchment_geometry.resolution + ) + edge_dem = self._dem.rio.clip_box(*tuple(edge_roi.total_bounds)) + #edge_dem = edge_dem.rio.clip(edge_roi) + self._write_netcdf_conventions_in_place( + edge_dem, self.catchment_geometry.crs + ) + edge_dem["z"] = edge_dem.z.rio.interpolate_na(method="nearest") + edge_dem = edge_dem.rio.clip(edge_roi, drop=True, ) + edge_dem = edge_dem.rio.clip( + region_to_rasterise.dissolve().geometry, + invert=True, + drop=True, ) - # Use the estimated bank heights where lower than the DEM edge values - mask_z_edge = mask_z.copy() - mask_z_edge[:] = False - mask_z_edge[mask_z] = flat_z[mask_z] > estimated_edge_z - flat_z[mask_z_edge] = estimated_edge_z[flat_z[mask_z] > estimated_edge_z] + # Save provided points + grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y) + flat_x = grid_x.flatten() + flat_y = grid_y.flatten() + flat_z = edge_dem.z.values.flatten() + mask_z = ~numpy.isnan(flat_z) + + # Interpolate the estimated bank heights around the polygon if they exist + if elevations.bank_heights_exist(): + # Get the estimated bank heights and define a mask where nan + bank_points = elevations.bank_height_points() + bank_nan_mask = numpy.logical_not(numpy.isnan(bank_points["Z"])) + # Interpolate from the estimated bank heights + xy_out = numpy.concatenate( + [[flat_x[mask_z]], [flat_y[mask_z]]], axis=0 + ).transpose() + options = { + "radius": elevations.points["width"].max(), + "raster_type": geometry.RASTER_TYPE, + "method": "linear", + "strict": False, + } + estimated_edge_z = elevation_from_points( + point_cloud=bank_points[bank_nan_mask], + xy_out=xy_out, + options=options, + ) - # Use the flat_x/y/z to define edge points and heights - edge_points = numpy.empty( - [mask_z.sum().sum()], - dtype=[ - ("X", geometry.RASTER_TYPE), - ("Y", geometry.RASTER_TYPE), - ("Z", geometry.RASTER_TYPE), - ], - ) - edge_points["X"] = flat_x[mask_z] - edge_points["Y"] = flat_y[mask_z] - edge_points["Z"] = flat_z[mask_z] + # Use the estimated bank heights where lower than the DEM edge values + mask_z_edge = mask_z.copy() + mask_z_edge[:] = False + mask_z_edge[mask_z] = flat_z[mask_z] > estimated_edge_z + flat_z[mask_z_edge] = estimated_edge_z[ + flat_z[mask_z] > estimated_edge_z + ] - edge_file = cache_path / f"{label}_edge_points.laz" - pdal_pipeline_instructions = [ - { - "type": "writers.las", - "a_srs": f"EPSG:" f"{crs['horizontal']}+" f"{crs['vertical']}", - "filename": str(edge_file), - "compression": "laszip", - } - ] - pdal_pipeline = pdal.Pipeline( - json.dumps(pdal_pipeline_instructions), [edge_points] - ) - pdal_pipeline.execute() + # Use the flat_x/y/z to define edge points and heights + edge_points = numpy.empty( + [mask_z.sum().sum()], + dtype=[ + ("X", geometry.RASTER_TYPE), + ("Y", geometry.RASTER_TYPE), + ("Z", geometry.RASTER_TYPE), + ], + ) + edge_points["X"] = flat_x[mask_z] + edge_points["Y"] = flat_y[mask_z] + edge_points["Z"] = flat_z[mask_z] - if ( - len(points) < raster_options["k_nearest_neighbours"] - or len(edge_points) < raster_options["k_nearest_neighbours"] + edge_file = cache_path / f"{label}_edge_points.laz" + pdal_pipeline_instructions = [ + { + "type": "writers.las", + "a_srs": f"EPSG:" f"{crs['horizontal']}+" f"{crs['vertical']}", + "filename": str(edge_file), + "compression": "laszip", + } + ] + pdal_pipeline = pdal.Pipeline( + json.dumps(pdal_pipeline_instructions), [edge_points] + ) + pdal_pipeline.execute() + + if len(points) < raster_options["k_nearest_neighbours"] or ( + include_edges and len(edge_points) < raster_options["k_nearest_neighbours"] ): + k_nearest_neighbours = ( + min(len(points), len(edge_points)) if include_edges else len(points) + ) logging.info( f"Fewer points or edge points than the default expected {raster_options['k_nearest_neighbours']}. " - f"Updating k_nearest_neighbours to {min(len(points), len(edge_points))}." + f"Updating k_nearest_neighbours to {k_nearest_neighbours}." ) - raster_options["k_nearest_neighbours"] = min(len(points), len(edge_points)) + raster_options["k_nearest_neighbours"] = k_nearest_neighbours if raster_options["k_nearest_neighbours"] < 3: logging.warning( f"Not enough points or edge points to meaningfully include {raster_options['k_nearest_neighbours']}. " @@ -1402,6 +1457,24 @@ def add_points_within_polygon_nearest_chunked( delayed_chunked_x = [] for j, dim_x in enumerate(chunked_dim_x): self.logger.debug(f"\tLiDAR chunk {[i, j]}") + # Check ROI to tile + chunk_region_to_tile = self._define_chunk_region( + region_to_rasterise=region_to_rasterise, + dim_x=dim_x, + dim_y=dim_y, + radius=0, + ) + if chunk_region_to_tile.area.sum() == 0: + self.logger.debug(f"\t\tReturning empty tile as out of RIO") + delayed_chunked_x.append( + dask.array.full( + shape=(len(dim_y), len(dim_x)), + fill_value=numpy.nan, + dtype=raster_options["raster_type"], + ) + ) + continue + # Load in points points = delayed_load_tiles_in_chunk( lidar_files=[points_file], @@ -1409,13 +1482,15 @@ def add_points_within_polygon_nearest_chunked( chunk_region_to_tile=None, crs=raster_options["crs"], ) - - edge_points = delayed_load_tiles_in_chunk( - lidar_files=[edge_file], - source_crs=raster_options["crs"], - chunk_region_to_tile=None, - crs=raster_options["crs"], - ) + if include_edges: + edge_points = delayed_load_tiles_in_chunk( + lidar_files=[edge_file], + source_crs=raster_options["crs"], + chunk_region_to_tile=None, + crs=raster_options["crs"], + ) + else: + edge_points = None # Rasterise tiles delayed_chunked_x.append( @@ -1704,7 +1779,7 @@ def __init__( self.buffer_cells = buffer_cells self._dem = None - def _set_up_chunks(self) -> (list, list): + def _set_up_chunks(self) -> tuple[list, list]: """Define the chunks to break the catchment into when reading in and downsampling LiDAR. """ @@ -1856,7 +1931,7 @@ def clip_lidar( # Clip DEM to Catchment and ensure NaN outside region to rasterise catchment = self.catchment_geometry.catchment - self._dem = self._dem.rio.clip_box(**catchment.bounds.iloc[0]) + self._dem = self._dem.rio.clip_box(*tuple(catchment.total_bounds)) self._dem = self._dem.where( clip_mask(self._dem.z, catchment.geometry, self.chunk_size) ) @@ -1969,6 +2044,22 @@ def _add_tiled_lidar_chunked( chunk_region_to_tile=chunk_region_to_tile, lidar_files_map=lidar_files_map, ) + + # Return empty if no files + if len(chunk_lidar_files) == 0: + self.logger.debug( + f"\t\tReturning empty tile as no LiDAR or out of ROI" + ) + delayed_chunked_x.append( + dask.array.full( + shape=(len(dim_y), len(dim_x)), + fill_value=numpy.nan, + dtype=raster_options["raster_type"], + ) + ) + continue + + # Get point cloud chunk_points = delayed_load_tiles_in_chunk( lidar_files=chunk_lidar_files, source_crs=source_crs, @@ -2381,6 +2472,9 @@ def __init__( "band", drop=True ) # remove band coordinate added by rasterio.open() self._write_netcdf_conventions_in_place(initial_dem, catchment_geometry.crs) + if not self._check_resolution(initial_dem): + raise ValueError("The specified resolution does not match the " + f"{initial_dem_path} resolution.") self._dem = initial_dem def add_patch(self, patch_path: pathlib.Path, label: str, layer: str): @@ -2410,19 +2504,21 @@ def add_patch(self, patch_path: pathlib.Path, label: str, layer: str): patch = rioxarray.rioxarray.open_rasterio( patch_path, masked=True, + chunks=True, ).squeeze("band", drop=True) patch.rio.write_crs(self.catchment_geometry.crs["horizontal"], inplace=True) patch_resolution = patch.rio.resolution() patch_resolution = max(abs(patch_resolution[0]), abs(patch_resolution[1])) + # Define region to patch within if self.drop_patch_offshore: roi = self.catchment_geometry.land_and_foreshore else: roi = self.catchment_geometry.catchment try: # Use try catch as otherwise crash if patch does not overlap roi - patch = patch.rio.clip( - roi.buffer(patch_resolution).geometry, - drop=True, + patch = patch.rio.clip_box(*tuple(roi.buffer(patch_resolution).total_bounds)) + patch = patch.where( + clip_mask(patch, roi.buffer(patch_resolution).geometry, self.chunk_size) ) except ( # If exception skip and proceed to the next patch rioxarray.exceptions.NoDataInBounds, @@ -2450,14 +2546,14 @@ def add_patch(self, patch_path: pathlib.Path, label: str, layer: str): self.logger.info(f"\t\tAdd data from coarse DEM: {patch_path.name}") # Check if same resolution - if all(patch.x.isin(self._dem.x)) and all(patch.y.isin(self._dem.y)): - # Do a stright replacement if grid aligned + if all(patch.x.isin(self._dem.x)) and all(patch.y.isin(self._dem.y)) and patch_resolution == self.catchment_geometry.resolution: + self.logger.info(f"\t\t\tGrid aligned so do a straight reindex") patch = patch.reindex_like(self._dem, fill_value=numpy.nan) elif ( self.chunk_size is not None and max(len(self._dem.x), len(self._dem.y)) > self.chunk_size - ): # Ensure parallelisation if chunks specified - # Expect xarray dims (y, x), not (x, y) as default for rioxarray + ): # Expect xarray dims (y, x), not (x, y) as default for rioxarray + self.logger.info(f"\t\t\tInterpolate with dask parallelisation at chunk size") interpolator = scipy.interpolate.RegularGridInterpolator( (patch.y.values, patch.x.values), patch.values, @@ -2483,6 +2579,7 @@ def dask_interpolation(y, x): ) patch.rio.write_transform(inplace=True) else: # No chunking use built in method + self.logger.info(f"\t\t\tInterpolate using built-in method") patch = patch.interp(x=self._dem.x, y=self._dem.y, method="linear") patch.rio.write_crs(self.catchment_geometry.crs["horizontal"], inplace=True) patch.rio.write_nodata(numpy.nan, encoded=True, inplace=True) @@ -2625,7 +2722,9 @@ def __init__( self._write_netcdf_conventions_in_place( hydrological_dem, catchment_geometry.crs ) - + if not self._check_resolution(hydrological_dem): + raise ValueError("The specified resolution does not match the " + f"{hydrological_dem_path} resolution.") # Ensure the resolution of the hydrological DEM matches the input DEM assert ( abs(float(hydrological_dem.x[1] - hydrological_dem.x[0])) @@ -2637,7 +2736,7 @@ def __init__( # Clip to the catchment extents to ensure performance catchment = self.catchment_geometry.catchment - hydrological_dem = hydrological_dem.rio.clip_box(**catchment.bounds.iloc[0]) + hydrological_dem = hydrological_dem.rio.clip_box(*tuple(catchment.total_bounds)) mask = clip_mask(hydrological_dem.z, catchment.geometry, self.chunk_size) hydrological_dem = hydrological_dem.where(mask) # Rerun as otherwise the no data as NaN seems to be lost for the data_source layer @@ -2861,6 +2960,21 @@ def _add_tiled_lidar_chunked( chunk_region_to_tile=chunk_region_to_tile, lidar_files_map=lidar_files_map, ) + + # Return empty if no files + if len(chunk_lidar_files) == 0: + self.logger.debug( + f"\t\tReturning empty tile as no LiDAR or out of ROI" + ) + delayed_chunked_x.append( + dask.array.full( + shape=(len(dim_y), len(dim_x)), + fill_value=numpy.nan, + dtype=raster_options["raster_type"], + ) + ) + continue + chunk_points = delayed_load_tiles_in_chunk( lidar_files=chunk_lidar_files, source_crs=source_crs, diff --git a/src/geofabrics/geometry.py b/src/geofabrics/geometry.py index 005f870f..1de8e7fb 100644 --- a/src/geofabrics/geometry.py +++ b/src/geofabrics/geometry.py @@ -871,6 +871,7 @@ def __init__( river_bathymetry_file: str, river_polygon_file: str, ocean_contour_file: str, + ocean_points_file: str, crs: int, cross_section_spacing: float, elevation_labels: list, @@ -885,6 +886,7 @@ def __init__( self.ocean_contour_file = ocean_contour_file self.ocean_contour_depth_label = ocean_contour_depth_label self.elevation_labels = elevation_labels + self.ocean_points_file = ocean_points_file def _get_mouth_alignment(self): """Get the location and alignment of the river mouth.""" @@ -966,6 +968,41 @@ def _get_ocean_contours( ), f"No contours exist with a depth {depth_multiplier}x the river mouth depth. " return ocean_contours + + def _get_distance_to_nearest_ocean_point_in_fan( + self, + river_mouth_z, + fan, + mouth_point, + depth_multiplier, + ) -> geopandas.GeoDataFrame: + """Load in the ocean points and get the nearest one to the coast that is deeper than + the river. + + Parameters: + river_mouth_depth The depth in m of the river mouth + depth_multiplier Number of times deeped the end contour should be + than the river mouth + """ + + # Load in the ocean contours and find the contours to terminate against + ocean_points = geopandas.read_file(self.ocean_points_file).to_crs(self.crs) + + # Determine the end depth and filter the contours to include only these contours + ocean_points = ocean_points.clip(fan).reset_index(drop=True) + if river_mouth_z > 0: + river_mouth_z = 0 + ocean_points = ocean_points[ + ocean_points["Z"] < depth_multiplier * river_mouth_z + ].reset_index(drop=True) + + if len(ocean_points) == 0: + raise ValueError(f"No points exist within fan at a depth {depth_multiplier}x the river mouth depth. ") + + distance = ocean_points.distance(mouth_point).min() + elevation = ocean_points.iloc[ocean_points.distance(mouth_point).idxmin()]["Z"] + + return distance, elevation def _bathymetry( self, @@ -1030,12 +1067,13 @@ def _bathymetry( fan_depths = geopandas.GeoDataFrame(fan_depths, crs=self.crs) return fan_depths - def _max_length_polygon( + def _fixed_length_fan_polygon( self, river_mouth_width: float, mouth_point: float, mouth_tangent: float, mouth_normal: float, + length: float ): """Return the fan polygon of maximum length. This will be used to produce data to the first contour at least 2x the depth of the river @@ -1048,13 +1086,13 @@ def _max_length_polygon( mouth_normal The normal to the river mouth (cross channel axis) """ - end_width = river_mouth_width + 2 * self.FAN_MAX_LENGTH * numpy.tan( + end_width = river_mouth_width + 2 * length * numpy.tan( numpy.pi / 180 * self.FAN_ANGLE / 2 ) fan_end_point = shapely.geometry.Point( [ - mouth_point.x + self.FAN_MAX_LENGTH * mouth_tangent.x, - mouth_point.y + self.FAN_MAX_LENGTH * mouth_tangent.y, + mouth_point.x + length * mouth_tangent.x, + mouth_point.y + length * mouth_tangent.y, ] ) fan_polygon = shapely.geometry.Polygon( @@ -1092,11 +1130,12 @@ def polygon_and_bathymetry(self): ) = self._get_mouth_bathymetry() # Create maximum fan polygon - fan_polygon = self._max_length_polygon( + fan_polygon = self._fixed_length_fan_polygon( river_mouth_width=river_mouth_width, mouth_point=mouth_point, mouth_tangent=mouth_tangent, mouth_normal=mouth_normal, + length=self.FAN_MAX_LENGTH, ) # Define fan centre line @@ -1110,76 +1149,114 @@ def polygon_and_bathymetry(self): ] ) - # Load in ocean depth contours and keep only those intersecting the fan centre - ocean_contours = self._get_ocean_contours(max(river_mouth_elevations)) - end_depth = ocean_contours[self.ocean_contour_depth_label].min() - ocean_contours = ocean_contours.explode(ignore_index=True) - ocean_contours = ocean_contours[ - ~ocean_contours.intersection(fan_centre).is_empty - ] - - # Keep the closest contour intersecting - if len(ocean_contours) > 0: - ocean_contours = ocean_contours.clip(fan_polygon).explode(ignore_index=True) - min_index = ocean_contours.distance(mouth_point).idxmin() - intersection_line = ocean_contours.loc[min_index].geometry - end_depth = ocean_contours.loc[min_index][self.ocean_contour_depth_label] - else: - self.logger.warning( - "No ocean contour intersected. Instaed assumed fan geoemtry" + if self.ocean_points_file is not None: + length, end_depth = self._get_distance_to_nearest_ocean_point_in_fan( + river_mouth_z=max(river_mouth_elevations), + fan=fan_polygon, + mouth_point=mouth_point, + depth_multiplier=2 ) - intersection_line = shapely.geometry.linestring( + fan_polygon = self._fixed_length_fan_polygon( + river_mouth_width=river_mouth_width, + mouth_point=mouth_point, + mouth_tangent=mouth_tangent, + mouth_normal=mouth_normal, + length=length, + ) + end_width = river_mouth_width + 2 * length * numpy.tan( + numpy.pi / 180 * self.FAN_ANGLE / 2 + ) + fan_end_point = shapely.geometry.Point( [ - [fan_polygon.exterior.xy[0][2], fan_polygon.exterior.xy[1][2]], - [fan_polygon.exterior.xy[0][3], fan_polygon.exterior.xy[1][3]], + mouth_point.x + length * mouth_tangent.x, + mouth_point.y + length * mouth_tangent.y, ] ) - - # Construct a fan ending at the contour - (x, y) = intersection_line.xy - polygon_points = [[xi, yi] for (xi, yi) in zip(x, y)] - - # Check if the intersected contour and mouth normal are roughtly parallel or - # anti-parallel - unit_vector_contour = numpy.array([x[-1] - x[0], y[-1] - y[0]]) - unit_vector_contour = unit_vector_contour / numpy.linalg.norm( - unit_vector_contour - ) - unit_vector_mouth = numpy.array([mouth_normal.x, mouth_normal.y]) - - # they have the oposite direction - if ( - numpy.arccos(numpy.dot(unit_vector_contour, unit_vector_mouth)) - > numpy.pi / 2 - ): - # keep line order - polygon_points.extend( + intersection_line = shapely.geometry.LineString( [ [ - mouth_point.x - mouth_normal.x * river_mouth_width / 2, - mouth_point.y - mouth_normal.y * river_mouth_width / 2, + fan_end_point.x + mouth_normal.x * end_width / 2, + fan_end_point.y + mouth_normal.y * end_width / 2, ], [ - mouth_point.x + mouth_normal.x * river_mouth_width / 2, - mouth_point.y + mouth_normal.y * river_mouth_width / 2, + fan_end_point.x - mouth_normal.x * end_width / 2, + fan_end_point.y - mouth_normal.y * end_width / 2, ], ] ) - else: # The have the same direction, so reverse - # reverse fan order - polygon_points.extend( - [ - [ - mouth_point.x + mouth_normal.x * river_mouth_width / 2, - mouth_point.y + mouth_normal.y * river_mouth_width / 2, - ], + + else: + # Load in ocean depth contours and keep only those intersecting the fan centre + ocean_contours = self._get_ocean_contours(max(river_mouth_elevations)) + end_depth = ocean_contours[self.ocean_contour_depth_label].min() + ocean_contours = ocean_contours.explode(ignore_index=True) + ocean_contours = ocean_contours[ + ~ocean_contours.intersection(fan_centre).is_empty + ] + + # Keep the closest contour intersecting + if len(ocean_contours) > 0: + ocean_contours = ocean_contours.clip(fan_polygon).explode(ignore_index=True) + min_index = ocean_contours.distance(mouth_point).idxmin() + intersection_line = ocean_contours.loc[min_index].geometry + end_depth = ocean_contours.loc[min_index][self.ocean_contour_depth_label] + else: + self.logger.warning( + "No ocean contour intersected. Instaed assumed fan geoemtry" + ) + intersection_line = shapely.geometry.LineString( [ - mouth_point.x - mouth_normal.x * river_mouth_width / 2, - mouth_point.y - mouth_normal.y * river_mouth_width / 2, - ], - ] + [fan_polygon.exterior.xy[0][2], fan_polygon.exterior.xy[1][2]], + [fan_polygon.exterior.xy[0][3], fan_polygon.exterior.xy[1][3]], + ] + ) + end_depth = max(river_mouth_elevations) * 10 + + # Construct a fan ending at the contour + (x, y) = intersection_line.xy + polygon_points = [[xi, yi] for (xi, yi) in zip(x, y)] + + # Check if the intersected contour and mouth normal are roughtly parallel or + # anti-parallel + unit_vector_contour = numpy.array([x[-1] - x[0], y[-1] - y[0]]) + unit_vector_contour = unit_vector_contour / numpy.linalg.norm( + unit_vector_contour ) - fan_polygon = shapely.geometry.Polygon(polygon_points) + unit_vector_mouth = numpy.array([mouth_normal.x, mouth_normal.y]) + + # they have the oposite direction + if ( + numpy.arccos(numpy.dot(unit_vector_contour, unit_vector_mouth)) + > numpy.pi / 2 + ): + # keep line order + polygon_points.extend( + [ + [ + mouth_point.x - mouth_normal.x * river_mouth_width / 2, + mouth_point.y - mouth_normal.y * river_mouth_width / 2, + ], + [ + mouth_point.x + mouth_normal.x * river_mouth_width / 2, + mouth_point.y + mouth_normal.y * river_mouth_width / 2, + ], + ] + ) + else: # The have the same direction, so reverse + # reverse fan order + polygon_points.extend( + [ + [ + mouth_point.x + mouth_normal.x * river_mouth_width / 2, + mouth_point.y + mouth_normal.y * river_mouth_width / 2, + ], + [ + mouth_point.x - mouth_normal.x * river_mouth_width / 2, + mouth_point.y - mouth_normal.y * river_mouth_width / 2, + ], + ] + ) + fan_polygon = shapely.geometry.Polygon(polygon_points) fan_polygon = geopandas.GeoDataFrame(geometry=[fan_polygon], crs=self.crs) # Get bathymetry values diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index 84bb73c3..b581240b 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -304,10 +304,17 @@ def get_instruction_general(self, key: str, subkey: str = None): "ocean": None, }, "ignore_clipping": False, - "ocean": { - "nearest_k_for_interpolation": 40, - "use_edge": False, - "is_depth": False, + "nearest_k_for_interpolation": { + "ocean": 40, + "lakes": 500, + "rivers": 100, + }, + "use_edge": { + "ocean": False, + "lakes": False, + }, + "is_depth": { + "ocean": False, }, "filter_waterways_by_osm_ids": [], "compression": 1, @@ -1178,7 +1185,7 @@ def add_hydrological_features( key="z_labels", subkey="ocean" ), is_depth=self.get_instruction_general( - key="ocean", subkey="is_depth" + key="is_depth", subkey="ocean" ), ) # Interpolate @@ -1186,10 +1193,10 @@ def add_hydrological_features( ocean_points=ocean_points, cache_path=temp_folder, use_edge=self.get_instruction_general( - key="ocean", subkey="use_edge" + key="use_edge", subkey="ocean" ), k_nearest_neighbours=self.get_instruction_general( - key="ocean", subkey="nearest_k_for_interpolation" + key="nearest_k_for_interpolation", subkey="ocean" ), buffer=self.get_instruction_general(key="lidar_buffer"), method=self.get_instruction_general( @@ -1316,6 +1323,12 @@ def add_hydrological_features( ), cache_path=temp_folder, label="lakes", + include_edges=self.get_instruction_general( + key="use_edge", subkey="lakes" + ), + k_nearest_neighbours=self.get_instruction_general( + key="nearest_k_for_interpolation", subkey="lakes" + ), ) temp_file = temp_folder / f"dem_added_{index + 1}_lake.nc" self.logger.info( @@ -1377,6 +1390,9 @@ def add_hydrological_features( ), cache_path=temp_folder, label="rivers and fans", + k_nearest_neighbours=self.get_instruction_general( + key="nearest_k_for_interpolation", subkey="rivers" + ), ) temp_file = temp_folder / f"dem_added_{index + 1}_rivers.nc" self.logger.info( @@ -1768,12 +1784,13 @@ def load_roads_osm(self) -> bool: if roads_polygon_path.is_file(): roads_polygon = geopandas.read_file(roads_path) - if roads_polygon.area.sum(): + if roads_polygon.area.sum() == 0: message = ( "Warning zero area roads polygon provided. Will ignore. " f"Please check {roads_polygon_path} if unexpected." ) self.logger.warning(message) + return roads_polygon if "roughness" not in roads_polygon.columns: message = ( "No roughnesses defined in the road polygon file. This is " @@ -1945,7 +1962,7 @@ def run(self): ) # Note must be called after all others if it is to be complete # If roads save temp then add in the roads - if roads is not None: + if roads is not None and roads.area.sum() > 0: # Cache roughness before adding roads temp_file = temp_folder / "zo_clipped.nc" self.logger.info(f"Save clipped geofabric to netCDF: {temp_file}") @@ -2103,11 +2120,16 @@ def estimate_river_mouth_fan(self, defaults: dict): elevations_clean.to_file(river_bathymetry_file) # Create fan object + if self.check_vector_or_raster(key="ocean_points", api_type="vector"): + ocean_points_file = self.get_instruction_path("ocean_points") + else: + ocean_points_file = None fan = geometry.RiverMouthFan( aligned_channel_file=river_centreline_file, river_bathymetry_file=river_bathymetry_file, river_polygon_file=river_polygon_file, ocean_contour_file=ocean_contour_file, + ocean_points_file=ocean_points_file, crs=crs, cross_section_spacing=cross_section_spacing, elevation_labels=["z"], @@ -3138,11 +3160,16 @@ def estimate_river_mouth_fan(self): ) # Create fan object + if self.check_vector_or_raster(key="ocean_points", api_type="vector"): + ocean_points_file = self.get_instruction_path("ocean_points") + else: + ocean_points_file = None fan = geometry.RiverMouthFan( aligned_channel_file=aligned_channel_file, river_bathymetry_file=river_bathymetry_file, river_polygon_file=river_polygon_file, ocean_contour_file=ocean_contour_file, + ocean_points_file=ocean_points_file, crs=crs, cross_section_spacing=cross_section_spacing, elevation_labels=[ @@ -3587,6 +3614,8 @@ def load_waterways(self) -> bool: if waterways_path.is_file(): waterways = geopandas.read_file(waterways_path) + if source == "osm": + waterways = waterways.set_index("OSM_id", drop=True) if "width" not in waterways.columns and source == "osm": message = ( "For an 'osm' source, the waterways file is generated by " @@ -3735,7 +3764,6 @@ def run(self): elevations.to_file(self.get_result_file_path(key="open_elevation")) elevations.to_file(self.get_result_file_path(key="closed_elevation")) return - # Create a DEM where the waterways and tunnels are self.create_dem(waterways=waterways) diff --git a/src/geofabrics/version.py b/src/geofabrics/version.py index 9a1caf91..7f63c850 100644 --- a/src/geofabrics/version.py +++ b/src/geofabrics/version.py @@ -3,4 +3,4 @@ Contains the package version information """ -__version__ = "1.1.24" +__version__ = "1.1.25" diff --git a/tests/test_add_patches_ngaruroro/instruction.json b/tests/test_add_patches_ngaruroro/instruction.json index b940053c..d2d0915f 100644 --- a/tests/test_add_patches_ngaruroro/instruction.json +++ b/tests/test_add_patches_ngaruroro/instruction.json @@ -5,7 +5,7 @@ "vertical": 7839 }, "grid_params": { - "resolution": 10 + "resolution": 8 } }, "processing": { diff --git a/tests/test_many_stages_waikanae/data/benchmark.nc b/tests/test_many_stages_waikanae/data/benchmark.nc index b91a5d04..f6f907bf 100644 --- a/tests/test_many_stages_waikanae/data/benchmark.nc +++ b/tests/test_many_stages_waikanae/data/benchmark.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65f580ba43b43b9712dfbe2747c3ab04ef8eb7d9c1a3251bd4ad4d38b93424d7 -size 111104 +oid sha256:d76ccc0adf2e483b286462cab80caecf1353d91203e99d1ee42e62ec2b5337ee +size 110663 diff --git a/tests/test_many_stages_waikanae/instruction.json b/tests/test_many_stages_waikanae/instruction.json index b5d7cc75..26b9b160 100644 --- a/tests/test_many_stages_waikanae/instruction.json +++ b/tests/test_many_stages_waikanae/instruction.json @@ -166,7 +166,7 @@ "lidar_classifications_to_keep": [2, 9], "interpolation": {"no_data": "linear", "ocean": "linear"}, "filter_waterways_by_osm_ids": [200394974], - "ocean": {"use_edge": true, "is_depth": true, "nearest_k_for_interpolation": 50} + "use_edge": {"ocean": true}, "is_depth": {"ocean": true}, "nearest_k_for_interpolation": {"ocean": 50} } }, "roughness": diff --git a/tests/test_many_stages_westport/data/benchmark.nc b/tests/test_many_stages_westport/data/benchmark.nc index 8b140951..d1db625d 100644 --- a/tests/test_many_stages_westport/data/benchmark.nc +++ b/tests/test_many_stages_westport/data/benchmark.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af8c7475da8979e9ae280b53a1c55037cd01c9436e9d08022ce77fd9f65937bf -size 111816 +oid sha256:409a4c62b1021ec25a270a1ca8982dd1f9ee73f51d3fd730231f6f04afc0da22 +size 111852 diff --git a/tests/test_many_stages_westport/test_case.py b/tests/test_many_stages_westport/test_case.py index 8e649dcd..03b73fe4 100644 --- a/tests/test_many_stages_westport/test_case.py +++ b/tests/test_many_stages_westport/test_case.py @@ -58,7 +58,6 @@ def setUpClass(cls): @pytest.mark.skipif(sys.platform != "win32", reason="Windows test - this is strict") def test_result_dem_windows(self): """A basic comparison between the generated and benchmark DEM""" - decimal = 3 file_path = ( self.cache_dir / self.instructions["dem"]["data_paths"]["benchmark_dem"] ) @@ -76,12 +75,22 @@ def test_result_dem_windows(self): - benchmark_dem.z.data[~numpy.isnan(benchmark_dem.z.data)] ) logging.info(f"DEM array diff is: {diff_array[diff_array != 0]}") - numpy.testing.assert_array_almost_equal( - test_dem.z.data[~numpy.isnan(test_dem.z.data)], - benchmark_dem.z.data[~numpy.isnan(benchmark_dem.z.data)], - decimal=decimal, - err_msg="The generated result_dem has different data from the " - "benchmark_dem", + + threshold = 10e-2 + allowable_number_above = 10 + self.assertTrue( + len(diff_array[numpy.abs(diff_array) > threshold]) + <= allowable_number_above, + "more than " + f"{allowable_number_above} DEM values differ by more than {threshold} on" + f" Linux test run: {diff_array[numpy.abs(diff_array) > threshold]}", + ) + threshold = 10e-4 + self.assertTrue( + len(diff_array[numpy.abs(diff_array) > threshold]) < len(diff_array) / 100, + f"{len(diff_array[numpy.abs(diff_array) > threshold])} or more than 1% of " + f"DEM values differ by more than {threshold} on Linux test run: " + f"{diff_array[numpy.abs(diff_array) > threshold]}", ) # explicitly free memory as xarray seems to be hanging onto memory @@ -121,7 +130,7 @@ def test_result_dem_linux(self): f"{allowable_number_above} DEM values differ by more than {threshold} on" f" Linux test run: {diff_array[numpy.abs(diff_array) > threshold]}", ) - threshold = 10e-6 + threshold = 10e-4 self.assertTrue( len(diff_array[numpy.abs(diff_array) > threshold]) < len(diff_array) / 100, f"{len(diff_array[numpy.abs(diff_array) > threshold])} or more than 1% of " diff --git a/tests/test_stopbanks_waikanae/data/benchmark.nc b/tests/test_stopbanks_waikanae/data/benchmark.nc index 62f6acee..999c35a2 100644 --- a/tests/test_stopbanks_waikanae/data/benchmark.nc +++ b/tests/test_stopbanks_waikanae/data/benchmark.nc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:802ba5c1f6b6fef96a2ce4d34d2dd2d386f5cf942b45f14834d4a8a2e0a15ae2 -size 38699 +oid sha256:30ffe59dab92739d6ed23259062b78ad4bfcb155a36a43e6dbac1ed306f44d54 +size 38523