From 12c89327c962d240f2f7972c40a1d490f762fbb3 Mon Sep 17 00:00:00 2001
From: Rose Pearson <rose.pearson@niwa.co.nz>
Date: Wed, 15 Jan 2025 11:19:42 +1300
Subject: [PATCH] various bug-fixes (#270)

* Fixed lake behaviour

* fixup: Format Python code with Black

* optimisation to only use KD tree if in ROI

* minor change to benchmark and updated instruction file for change to schema

* ensure consistent behaviour between first and second run of waterways

* Fix to ensure full with nan (no data value) instead of random empty values - ensure consistent behaviour

* Add support for if ocean_points file - then make offshore fan to the offshore points in perference to contours.

* Updated tests and committed optimisation to clipping

* Fixed TB when no roads in a roughness domain

* Only reindex patch if all x and y also in the destimation DEM and also it is of the same resolution. other minor code tidy up and corrections

* updated version

* Updated code to ensure expected DEM resolution is matched and updated test

---------

Co-authored-by: github-actions <github-actions@github.com>
---
 pyproject.toml                                |   2 +-
 src/geofabrics/dem.py                         | 328 ++++++++++++------
 src/geofabrics/geometry.py                    | 201 +++++++----
 src/geofabrics/processor.py                   |  48 ++-
 src/geofabrics/version.py                     |   2 +-
 .../instruction.json                          |   2 +-
 .../data/benchmark.nc                         |   4 +-
 .../instruction.json                          |   2 +-
 .../data/benchmark.nc                         |   4 +-
 tests/test_many_stages_westport/test_case.py  |  25 +-
 .../test_stopbanks_waikanae/data/benchmark.nc |   4 +-
 11 files changed, 425 insertions(+), 197 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c7440234..5920609e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "geofabrics"
-version = "1.1.24"
+version = "1.1.25"
 description = "A package for creating geofabrics for flood modelling."
 readme = "README.md"
 authors = [{ name = "Rose pearson", email = "rose.pearson@niwa.co.nz" }]
diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py
index b3504ba1..f4a4b13f 100644
--- a/src/geofabrics/dem.py
+++ b/src/geofabrics/dem.py
@@ -315,6 +315,14 @@ def __init__(self, catchment_geometry: geometry.CatchmentGeometry, chunk_size: i
     def dem(self) -> xarray.Dataset:
         """Return the DEM over the catchment region"""
         raise NotImplementedError("dem must be instantiated in the child class")
+    
+    def _check_resolution(self, dem: xarray.Dataset):
+        """ Check the DEM resolution matches the specified resolution. """
+        dem_resolution = dem.rio.resolution()
+        dem_resolution =  max(abs(dem_resolution[0]), abs(dem_resolution[1]))
+
+        return self.catchment_geometry.resolution == dem_resolution
+
 
     def _load_dem(self, filename: pathlib.Path) -> xarray.Dataset:
         """Load in and replace the DEM with a previously cached version."""
@@ -337,6 +345,9 @@ def _load_dem(self, filename: pathlib.Path) -> xarray.Dataset:
         if "zo" in dem.keys():
             dem["zo"] = dem.zo.astype(geometry.RASTER_TYPE)
 
+        if not self._check_resolution(dem):
+            raise ValueError("The specified resolution does not match the "
+                             f"{filename} resolution.")
         return dem
 
     def save_dem(
@@ -511,7 +522,7 @@ def _extents_from_mask(self, mask: numpy.ndarray, transform: dict):
 
         return dense_extents
 
-    def _chunks_from_dem(self, chunk_size, dem: xarray.Dataset) -> (list, list):
+    def _chunks_from_dem(self, chunk_size, dem: xarray.Dataset) -> tuple[list, list]:
         """Define the chunks to break the catchment into when reading in and
         downsampling LiDAR.
 
@@ -631,7 +642,10 @@ def __init__(
         self._write_netcdf_conventions_in_place(raw_dem, catchment_geometry.crs)
 
         # Clip to catchment and set the data_source layer to NaN where there is no data
-        raw_dem = raw_dem.rio.clip(catchment_geometry.catchment.geometry, drop=True)
+        raw_dem = raw_dem.rio.clip_box(*tuple(catchment_geometry.catchment.total_bounds))
+        raw_dem = raw_dem.where(
+            clip_mask(raw_dem.z, catchment_geometry.catchment.geometry, self.chunk_size)
+        )
         raw_dem["data_source"] = raw_dem.data_source.where(
             raw_dem.data_source != self.SOURCE_CLASSIFICATION["no data"],
             numpy.nan,
@@ -639,6 +653,10 @@ def __init__(
         # Rerun as otherwise the no data as NaN seems to be lost for the data_source layer
         self._write_netcdf_conventions_in_place(raw_dem, catchment_geometry.crs)
 
+        if not self._check_resolution(raw_dem):
+            raise ValueError("The specified resolution does not match the "
+                             f"{raw_dem_path} resolution.")
+
         # Set attributes
         self._raw_dem = raw_dem
         self.interpolation_method = interpolation_method
@@ -884,6 +902,23 @@ def interpolate_ocean_chunked(
             delayed_chunked_x = []
             for j, dim_x in enumerate(chunked_dim_x):
                 self.logger.debug(f"\tLiDAR chunk {[i, j]}")
+                # Check ROI to tile
+                chunk_region_to_tile = self._define_chunk_region(
+                    region_to_rasterise=region_to_rasterise,
+                    dim_x=dim_x,
+                    dim_y=dim_y,
+                    radius=0,
+                )
+                if chunk_region_to_tile.area.sum() == 0:
+                    self.logger.debug(f"\t\tReturning empty tile as out of RIO")
+                    delayed_chunked_x.append(
+                        dask.array.full(
+                            shape=(len(dim_y), len(dim_x)),
+                            fill_value=numpy.nan,
+                            dtype=raster_options["raster_type"],
+                        )
+                    )
+                    continue
                 # Load in points
                 chunk_offshore_points = delayed_load_tiles_in_chunk(
                     lidar_files=[offshore_file],
@@ -1141,16 +1176,12 @@ def add_points_within_polygon_chunked(
 
         if include_edges:
             # Get edge points - from DEM
-            edge_dem = self._dem.rio.clip(
-                region_to_rasterise.dissolve().buffer(
-                    self.catchment_geometry.resolution
-                ),
-                drop=True,
-            )
+            edge_roi = region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution)
+            edge_dem = self._dem.rio.clip_box(*tuple(edge_roi.total_bounds))
+            edge_dem = edge_dem.rio.clip(edge_roi)
             edge_dem = edge_dem.rio.clip(
                 region_to_rasterise.dissolve().geometry,
                 invert=True,
-                drop=True,
             )
             # Define the edge points
             grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y)
@@ -1203,6 +1234,24 @@ def add_points_within_polygon_chunked(
             delayed_chunked_x = []
             for j, dim_x in enumerate(chunked_dim_x):
                 self.logger.debug(f"\tLiDAR chunk {[i, j]}")
+                # Check ROI to tile
+                chunk_region_to_tile = self._define_chunk_region(
+                    region_to_rasterise=region_to_rasterise,
+                    dim_x=dim_x,
+                    dim_y=dim_y,
+                    radius=0,
+                )
+                if chunk_region_to_tile.area.sum() == 0:
+                    self.logger.debug(f"\t\tReturning empty tile as out of RIO")
+                    delayed_chunked_x.append(
+                        dask.array.full(
+                            shape=(len(dim_y), len(dim_x)),
+                            fill_value=numpy.nan,
+                            dtype=raster_options["raster_type"],
+                        )
+                    )
+                    continue
+
                 # Load in points
                 river_points = delayed_load_tiles_in_chunk(
                     lidar_files=[lidar_file],
@@ -1253,7 +1302,8 @@ def add_points_within_polygon_nearest_chunked(
         method: str,
         cache_path: pathlib.Path,
         label: str,
-        k_nearest_neighbours: int = 100,
+        k_nearest_neighbours: int,
+        include_edges: bool = True,
     ) -> xarray.Dataset:
         """Performs interpolation from estimated bathymetry points within a polygon
         using the specified interpolation approach after filtering the points based
@@ -1267,7 +1317,7 @@ def add_points_within_polygon_nearest_chunked(
             "method": method,
             "crs": crs,
             "k_nearest_neighbours": k_nearest_neighbours,
-            "use_edge": True,
+            "use_edge": include_edges,
             "strict": False,
         }
         if method == "rbf":
@@ -1289,93 +1339,98 @@ def add_points_within_polygon_nearest_chunked(
         pdal_pipeline = pdal.Pipeline(json.dumps(pdal_pipeline_instructions), [points])
         pdal_pipeline.execute()
 
-        # Tempoarily save the adjacent points from the DEM
-        edge_dem = self._dem.rio.clip(
-            region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution),
-            drop=True,
-        )
-        self._write_netcdf_conventions_in_place(edge_dem, self.catchment_geometry.crs)
-        edge_dem["z"] = edge_dem.z.rio.interpolate_na(method="nearest")
-        edge_dem = self._dem.rio.clip(
-            region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution),
-            drop=True,
-        )
-        edge_dem = edge_dem.rio.clip(
-            region_to_rasterise.dissolve().geometry,
-            invert=True,
-            drop=True,
-        )
-
-        # Save provided points
-        grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y)
-        flat_x = grid_x.flatten()
-        flat_y = grid_y.flatten()
-        flat_z = edge_dem.z.values.flatten()
-        mask_z = ~numpy.isnan(flat_z)
-
-        # Interpolate the estimated bank heights around the polygon if they exist
-        if elevations.bank_heights_exist():
-            # Get the estimated  bank heights and define a mask where nan
-            bank_points = elevations.bank_height_points()
-            bank_nan_mask = numpy.logical_not(numpy.isnan(bank_points["Z"]))
-            # Interpolate from the estimated bank heights
-            xy_out = numpy.concatenate(
-                [[flat_x[mask_z]], [flat_y[mask_z]]], axis=0
-            ).transpose()
-            options = {
-                "radius": elevations.points["width"].max(),
-                "raster_type": geometry.RASTER_TYPE,
-                "method": "linear",
-                "strict": False,
-            }
-            estimated_edge_z = elevation_from_points(
-                point_cloud=bank_points[bank_nan_mask],
-                xy_out=xy_out,
-                options=options,
+        # Tempoarily save the adjacent points from the DEM - ensure no NaN through NN interpolation
+        if include_edges:
+            edge_roi = region_to_rasterise.dissolve().buffer(
+                    self.catchment_geometry.resolution
+                )
+            edge_dem = self._dem.rio.clip_box(*tuple(edge_roi.total_bounds))
+            #edge_dem = edge_dem.rio.clip(edge_roi)
+            self._write_netcdf_conventions_in_place(
+                edge_dem, self.catchment_geometry.crs
+            )
+            edge_dem["z"] = edge_dem.z.rio.interpolate_na(method="nearest")
+            edge_dem = edge_dem.rio.clip(edge_roi, drop=True, )
+            edge_dem = edge_dem.rio.clip(
+                region_to_rasterise.dissolve().geometry,
+                invert=True,
+                drop=True,
             )
 
-            # Use the estimated bank heights where lower than the DEM edge values
-            mask_z_edge = mask_z.copy()
-            mask_z_edge[:] = False
-            mask_z_edge[mask_z] = flat_z[mask_z] > estimated_edge_z
-            flat_z[mask_z_edge] = estimated_edge_z[flat_z[mask_z] > estimated_edge_z]
+            # Save provided points
+            grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y)
+            flat_x = grid_x.flatten()
+            flat_y = grid_y.flatten()
+            flat_z = edge_dem.z.values.flatten()
+            mask_z = ~numpy.isnan(flat_z)
+
+            # Interpolate the estimated bank heights around the polygon if they exist
+            if elevations.bank_heights_exist():
+                # Get the estimated  bank heights and define a mask where nan
+                bank_points = elevations.bank_height_points()
+                bank_nan_mask = numpy.logical_not(numpy.isnan(bank_points["Z"]))
+                # Interpolate from the estimated bank heights
+                xy_out = numpy.concatenate(
+                    [[flat_x[mask_z]], [flat_y[mask_z]]], axis=0
+                ).transpose()
+                options = {
+                    "radius": elevations.points["width"].max(),
+                    "raster_type": geometry.RASTER_TYPE,
+                    "method": "linear",
+                    "strict": False,
+                }
+                estimated_edge_z = elevation_from_points(
+                    point_cloud=bank_points[bank_nan_mask],
+                    xy_out=xy_out,
+                    options=options,
+                )
 
-        # Use the flat_x/y/z to define edge points and heights
-        edge_points = numpy.empty(
-            [mask_z.sum().sum()],
-            dtype=[
-                ("X", geometry.RASTER_TYPE),
-                ("Y", geometry.RASTER_TYPE),
-                ("Z", geometry.RASTER_TYPE),
-            ],
-        )
-        edge_points["X"] = flat_x[mask_z]
-        edge_points["Y"] = flat_y[mask_z]
-        edge_points["Z"] = flat_z[mask_z]
+                # Use the estimated bank heights where lower than the DEM edge values
+                mask_z_edge = mask_z.copy()
+                mask_z_edge[:] = False
+                mask_z_edge[mask_z] = flat_z[mask_z] > estimated_edge_z
+                flat_z[mask_z_edge] = estimated_edge_z[
+                    flat_z[mask_z] > estimated_edge_z
+                ]
 
-        edge_file = cache_path / f"{label}_edge_points.laz"
-        pdal_pipeline_instructions = [
-            {
-                "type": "writers.las",
-                "a_srs": f"EPSG:" f"{crs['horizontal']}+" f"{crs['vertical']}",
-                "filename": str(edge_file),
-                "compression": "laszip",
-            }
-        ]
-        pdal_pipeline = pdal.Pipeline(
-            json.dumps(pdal_pipeline_instructions), [edge_points]
-        )
-        pdal_pipeline.execute()
+            # Use the flat_x/y/z to define edge points and heights
+            edge_points = numpy.empty(
+                [mask_z.sum().sum()],
+                dtype=[
+                    ("X", geometry.RASTER_TYPE),
+                    ("Y", geometry.RASTER_TYPE),
+                    ("Z", geometry.RASTER_TYPE),
+                ],
+            )
+            edge_points["X"] = flat_x[mask_z]
+            edge_points["Y"] = flat_y[mask_z]
+            edge_points["Z"] = flat_z[mask_z]
 
-        if (
-            len(points) < raster_options["k_nearest_neighbours"]
-            or len(edge_points) < raster_options["k_nearest_neighbours"]
+            edge_file = cache_path / f"{label}_edge_points.laz"
+            pdal_pipeline_instructions = [
+                {
+                    "type": "writers.las",
+                    "a_srs": f"EPSG:" f"{crs['horizontal']}+" f"{crs['vertical']}",
+                    "filename": str(edge_file),
+                    "compression": "laszip",
+                }
+            ]
+            pdal_pipeline = pdal.Pipeline(
+                json.dumps(pdal_pipeline_instructions), [edge_points]
+            )
+            pdal_pipeline.execute()
+
+        if len(points) < raster_options["k_nearest_neighbours"] or (
+            include_edges and len(edge_points) < raster_options["k_nearest_neighbours"]
         ):
+            k_nearest_neighbours = (
+                min(len(points), len(edge_points)) if include_edges else len(points)
+            )
             logging.info(
                 f"Fewer points or edge points than the default expected {raster_options['k_nearest_neighbours']}. "
-                f"Updating k_nearest_neighbours to {min(len(points), len(edge_points))}."
+                f"Updating k_nearest_neighbours to {k_nearest_neighbours}."
             )
-            raster_options["k_nearest_neighbours"] = min(len(points), len(edge_points))
+            raster_options["k_nearest_neighbours"] = k_nearest_neighbours
         if raster_options["k_nearest_neighbours"] < 3:
             logging.warning(
                 f"Not enough points or edge points to meaningfully include {raster_options['k_nearest_neighbours']}. "
@@ -1402,6 +1457,24 @@ def add_points_within_polygon_nearest_chunked(
             delayed_chunked_x = []
             for j, dim_x in enumerate(chunked_dim_x):
                 self.logger.debug(f"\tLiDAR chunk {[i, j]}")
+                # Check ROI to tile
+                chunk_region_to_tile = self._define_chunk_region(
+                    region_to_rasterise=region_to_rasterise,
+                    dim_x=dim_x,
+                    dim_y=dim_y,
+                    radius=0,
+                )
+                if chunk_region_to_tile.area.sum() == 0:
+                    self.logger.debug(f"\t\tReturning empty tile as out of RIO")
+                    delayed_chunked_x.append(
+                        dask.array.full(
+                            shape=(len(dim_y), len(dim_x)),
+                            fill_value=numpy.nan,
+                            dtype=raster_options["raster_type"],
+                        )
+                    )
+                    continue
+
                 # Load in points
                 points = delayed_load_tiles_in_chunk(
                     lidar_files=[points_file],
@@ -1409,13 +1482,15 @@ def add_points_within_polygon_nearest_chunked(
                     chunk_region_to_tile=None,
                     crs=raster_options["crs"],
                 )
-
-                edge_points = delayed_load_tiles_in_chunk(
-                    lidar_files=[edge_file],
-                    source_crs=raster_options["crs"],
-                    chunk_region_to_tile=None,
-                    crs=raster_options["crs"],
-                )
+                if include_edges:
+                    edge_points = delayed_load_tiles_in_chunk(
+                        lidar_files=[edge_file],
+                        source_crs=raster_options["crs"],
+                        chunk_region_to_tile=None,
+                        crs=raster_options["crs"],
+                    )
+                else:
+                    edge_points = None
 
                 # Rasterise tiles
                 delayed_chunked_x.append(
@@ -1704,7 +1779,7 @@ def __init__(
         self.buffer_cells = buffer_cells
         self._dem = None
 
-    def _set_up_chunks(self) -> (list, list):
+    def _set_up_chunks(self) -> tuple[list, list]:
         """Define the chunks to break the catchment into when reading in and
         downsampling LiDAR.
         """
@@ -1856,7 +1931,7 @@ def clip_lidar(
 
         # Clip DEM to Catchment and ensure NaN outside region to rasterise
         catchment = self.catchment_geometry.catchment
-        self._dem = self._dem.rio.clip_box(**catchment.bounds.iloc[0])
+        self._dem = self._dem.rio.clip_box(*tuple(catchment.total_bounds))
         self._dem = self._dem.where(
             clip_mask(self._dem.z, catchment.geometry, self.chunk_size)
         )
@@ -1969,6 +2044,22 @@ def _add_tiled_lidar_chunked(
                         chunk_region_to_tile=chunk_region_to_tile,
                         lidar_files_map=lidar_files_map,
                     )
+
+                    # Return empty if no files
+                    if len(chunk_lidar_files) == 0:
+                        self.logger.debug(
+                            f"\t\tReturning empty tile as no LiDAR or out of ROI"
+                        )
+                        delayed_chunked_x.append(
+                            dask.array.full(
+                                shape=(len(dim_y), len(dim_x)),
+                                fill_value=numpy.nan,
+                                dtype=raster_options["raster_type"],
+                            )
+                        )
+                        continue
+
+                    # Get point cloud
                     chunk_points = delayed_load_tiles_in_chunk(
                         lidar_files=chunk_lidar_files,
                         source_crs=source_crs,
@@ -2381,6 +2472,9 @@ def __init__(
             "band", drop=True
         )  # remove band coordinate added by rasterio.open()
         self._write_netcdf_conventions_in_place(initial_dem, catchment_geometry.crs)
+        if not self._check_resolution(initial_dem):
+            raise ValueError("The specified resolution does not match the "
+                             f"{initial_dem_path} resolution.")
         self._dem = initial_dem
 
     def add_patch(self, patch_path: pathlib.Path, label: str, layer: str):
@@ -2410,19 +2504,21 @@ def add_patch(self, patch_path: pathlib.Path, label: str, layer: str):
         patch = rioxarray.rioxarray.open_rasterio(
             patch_path,
             masked=True,
+            chunks=True,
         ).squeeze("band", drop=True)
         patch.rio.write_crs(self.catchment_geometry.crs["horizontal"], inplace=True)
         patch_resolution = patch.rio.resolution()
         patch_resolution = max(abs(patch_resolution[0]), abs(patch_resolution[1]))
+
         # Define region to patch within
         if self.drop_patch_offshore:
             roi = self.catchment_geometry.land_and_foreshore
         else:
             roi = self.catchment_geometry.catchment
         try:  # Use try catch as otherwise crash if patch does not overlap roi
-            patch = patch.rio.clip(
-                roi.buffer(patch_resolution).geometry,
-                drop=True,
+            patch = patch.rio.clip_box(*tuple(roi.buffer(patch_resolution).total_bounds))
+            patch = patch.where(
+                clip_mask(patch, roi.buffer(patch_resolution).geometry, self.chunk_size)
             )
         except (  # If exception skip and proceed to the next patch
             rioxarray.exceptions.NoDataInBounds,
@@ -2450,14 +2546,14 @@ def add_patch(self, patch_path: pathlib.Path, label: str, layer: str):
         self.logger.info(f"\t\tAdd data from coarse DEM: {patch_path.name}")
 
         # Check if same resolution
-        if all(patch.x.isin(self._dem.x)) and all(patch.y.isin(self._dem.y)):
-            # Do a stright replacement if grid aligned
+        if all(patch.x.isin(self._dem.x)) and all(patch.y.isin(self._dem.y)) and patch_resolution == self.catchment_geometry.resolution:
+            self.logger.info(f"\t\t\tGrid aligned so do a straight reindex")
             patch = patch.reindex_like(self._dem, fill_value=numpy.nan)
         elif (
             self.chunk_size is not None
             and max(len(self._dem.x), len(self._dem.y)) > self.chunk_size
-        ):  # Ensure parallelisation if chunks specified
-            # Expect xarray dims (y, x), not (x, y) as default for rioxarray
+        ):  # Expect xarray dims (y, x), not (x, y) as default for rioxarray
+            self.logger.info(f"\t\t\tInterpolate with dask parallelisation at chunk size")
             interpolator = scipy.interpolate.RegularGridInterpolator(
                 (patch.y.values, patch.x.values),
                 patch.values,
@@ -2483,6 +2579,7 @@ def dask_interpolation(y, x):
             )
             patch.rio.write_transform(inplace=True)
         else:  # No chunking use built in method
+            self.logger.info(f"\t\t\tInterpolate using built-in method")
             patch = patch.interp(x=self._dem.x, y=self._dem.y, method="linear")
         patch.rio.write_crs(self.catchment_geometry.crs["horizontal"], inplace=True)
         patch.rio.write_nodata(numpy.nan, encoded=True, inplace=True)
@@ -2625,7 +2722,9 @@ def __init__(
         self._write_netcdf_conventions_in_place(
             hydrological_dem, catchment_geometry.crs
         )
-
+        if not self._check_resolution(hydrological_dem):
+            raise ValueError("The specified resolution does not match the "
+                             f"{hydrological_dem_path} resolution.")
         # Ensure the resolution of the hydrological DEM matches the input DEM
         assert (
             abs(float(hydrological_dem.x[1] - hydrological_dem.x[0]))
@@ -2637,7 +2736,7 @@ def __init__(
 
         # Clip to the catchment extents to ensure performance
         catchment = self.catchment_geometry.catchment
-        hydrological_dem = hydrological_dem.rio.clip_box(**catchment.bounds.iloc[0])
+        hydrological_dem = hydrological_dem.rio.clip_box(*tuple(catchment.total_bounds))
         mask = clip_mask(hydrological_dem.z, catchment.geometry, self.chunk_size)
         hydrological_dem = hydrological_dem.where(mask)
         # Rerun as otherwise the no data as NaN seems to be lost for the data_source layer
@@ -2861,6 +2960,21 @@ def _add_tiled_lidar_chunked(
                         chunk_region_to_tile=chunk_region_to_tile,
                         lidar_files_map=lidar_files_map,
                     )
+
+                    # Return empty if no files
+                    if len(chunk_lidar_files) == 0:
+                        self.logger.debug(
+                            f"\t\tReturning empty tile as no LiDAR or out of ROI"
+                        )
+                        delayed_chunked_x.append(
+                            dask.array.full(
+                                shape=(len(dim_y), len(dim_x)),
+                                fill_value=numpy.nan,
+                                dtype=raster_options["raster_type"],
+                            )
+                        )
+                        continue
+
                     chunk_points = delayed_load_tiles_in_chunk(
                         lidar_files=chunk_lidar_files,
                         source_crs=source_crs,
diff --git a/src/geofabrics/geometry.py b/src/geofabrics/geometry.py
index 005f870f..1de8e7fb 100644
--- a/src/geofabrics/geometry.py
+++ b/src/geofabrics/geometry.py
@@ -871,6 +871,7 @@ def __init__(
         river_bathymetry_file: str,
         river_polygon_file: str,
         ocean_contour_file: str,
+        ocean_points_file: str,
         crs: int,
         cross_section_spacing: float,
         elevation_labels: list,
@@ -885,6 +886,7 @@ def __init__(
         self.ocean_contour_file = ocean_contour_file
         self.ocean_contour_depth_label = ocean_contour_depth_label
         self.elevation_labels = elevation_labels
+        self.ocean_points_file = ocean_points_file
 
     def _get_mouth_alignment(self):
         """Get the location and alignment of the river mouth."""
@@ -966,6 +968,41 @@ def _get_ocean_contours(
         ), f"No contours exist with a depth {depth_multiplier}x the river mouth depth. "
 
         return ocean_contours
+    
+    def _get_distance_to_nearest_ocean_point_in_fan(
+        self,
+        river_mouth_z,
+        fan,
+        mouth_point,
+        depth_multiplier,
+    ) -> geopandas.GeoDataFrame:
+        """Load in the ocean points and get the nearest one to the coast that is deeper than
+        the river.
+
+        Parameters:
+            river_mouth_depth  The depth in m of the river mouth
+            depth_multiplier  Number of times deeped the end contour should be
+                than the river mouth
+        """
+
+        # Load in the ocean contours and find the contours to terminate against
+        ocean_points = geopandas.read_file(self.ocean_points_file).to_crs(self.crs)
+
+        # Determine the end depth and filter the contours to include only these contours
+        ocean_points = ocean_points.clip(fan).reset_index(drop=True)
+        if river_mouth_z > 0:
+            river_mouth_z = 0
+        ocean_points = ocean_points[
+            ocean_points["Z"] < depth_multiplier * river_mouth_z
+        ].reset_index(drop=True)
+
+        if len(ocean_points) == 0:
+            raise ValueError(f"No points exist within fan at a depth {depth_multiplier}x the river mouth depth. ")
+        
+        distance = ocean_points.distance(mouth_point).min()
+        elevation = ocean_points.iloc[ocean_points.distance(mouth_point).idxmin()]["Z"]
+
+        return distance, elevation
 
     def _bathymetry(
         self,
@@ -1030,12 +1067,13 @@ def _bathymetry(
         fan_depths = geopandas.GeoDataFrame(fan_depths, crs=self.crs)
         return fan_depths
 
-    def _max_length_polygon(
+    def _fixed_length_fan_polygon(
         self,
         river_mouth_width: float,
         mouth_point: float,
         mouth_tangent: float,
         mouth_normal: float,
+        length: float
     ):
         """Return the fan polygon of maximum length. This will be used to
         produce data to the first contour at least 2x the depth of the river
@@ -1048,13 +1086,13 @@ def _max_length_polygon(
             mouth_normal  The normal to the river mouth (cross channel axis)
         """
 
-        end_width = river_mouth_width + 2 * self.FAN_MAX_LENGTH * numpy.tan(
+        end_width = river_mouth_width + 2 * length * numpy.tan(
             numpy.pi / 180 * self.FAN_ANGLE / 2
         )
         fan_end_point = shapely.geometry.Point(
             [
-                mouth_point.x + self.FAN_MAX_LENGTH * mouth_tangent.x,
-                mouth_point.y + self.FAN_MAX_LENGTH * mouth_tangent.y,
+                mouth_point.x + length * mouth_tangent.x,
+                mouth_point.y + length * mouth_tangent.y,
             ]
         )
         fan_polygon = shapely.geometry.Polygon(
@@ -1092,11 +1130,12 @@ def polygon_and_bathymetry(self):
         ) = self._get_mouth_bathymetry()
 
         # Create maximum fan polygon
-        fan_polygon = self._max_length_polygon(
+        fan_polygon = self._fixed_length_fan_polygon(
             river_mouth_width=river_mouth_width,
             mouth_point=mouth_point,
             mouth_tangent=mouth_tangent,
             mouth_normal=mouth_normal,
+            length=self.FAN_MAX_LENGTH,
         )
 
         # Define fan centre line
@@ -1110,76 +1149,114 @@ def polygon_and_bathymetry(self):
             ]
         )
 
-        # Load in ocean depth contours and keep only those intersecting the fan centre
-        ocean_contours = self._get_ocean_contours(max(river_mouth_elevations))
-        end_depth = ocean_contours[self.ocean_contour_depth_label].min()
-        ocean_contours = ocean_contours.explode(ignore_index=True)
-        ocean_contours = ocean_contours[
-            ~ocean_contours.intersection(fan_centre).is_empty
-        ]
-
-        # Keep the closest contour intersecting
-        if len(ocean_contours) > 0:
-            ocean_contours = ocean_contours.clip(fan_polygon).explode(ignore_index=True)
-            min_index = ocean_contours.distance(mouth_point).idxmin()
-            intersection_line = ocean_contours.loc[min_index].geometry
-            end_depth = ocean_contours.loc[min_index][self.ocean_contour_depth_label]
-        else:
-            self.logger.warning(
-                "No ocean contour intersected. Instaed assumed fan geoemtry"
+        if self.ocean_points_file is not None:
+            length, end_depth = self._get_distance_to_nearest_ocean_point_in_fan(
+                river_mouth_z=max(river_mouth_elevations),
+                fan=fan_polygon,
+                mouth_point=mouth_point,
+                depth_multiplier=2
             )
-            intersection_line = shapely.geometry.linestring(
+            fan_polygon = self._fixed_length_fan_polygon(
+                river_mouth_width=river_mouth_width,
+                mouth_point=mouth_point,
+                mouth_tangent=mouth_tangent,
+                mouth_normal=mouth_normal,
+                length=length,
+            )
+            end_width = river_mouth_width + 2 * length * numpy.tan(
+                numpy.pi / 180 * self.FAN_ANGLE / 2
+            )
+            fan_end_point = shapely.geometry.Point(
                 [
-                    [fan_polygon.exterior.xy[0][2], fan_polygon.exterior.xy[1][2]],
-                    [fan_polygon.exterior.xy[0][3], fan_polygon.exterior.xy[1][3]],
+                    mouth_point.x + length * mouth_tangent.x,
+                    mouth_point.y + length * mouth_tangent.y,
                 ]
             )
-
-        # Construct a fan ending at the contour
-        (x, y) = intersection_line.xy
-        polygon_points = [[xi, yi] for (xi, yi) in zip(x, y)]
-
-        # Check if the intersected contour and mouth normal are roughtly parallel or
-        # anti-parallel
-        unit_vector_contour = numpy.array([x[-1] - x[0], y[-1] - y[0]])
-        unit_vector_contour = unit_vector_contour / numpy.linalg.norm(
-            unit_vector_contour
-        )
-        unit_vector_mouth = numpy.array([mouth_normal.x, mouth_normal.y])
-
-        # they have the oposite direction
-        if (
-            numpy.arccos(numpy.dot(unit_vector_contour, unit_vector_mouth))
-            > numpy.pi / 2
-        ):
-            # keep line order
-            polygon_points.extend(
+            intersection_line = shapely.geometry.LineString(
                 [
                     [
-                        mouth_point.x - mouth_normal.x * river_mouth_width / 2,
-                        mouth_point.y - mouth_normal.y * river_mouth_width / 2,
+                        fan_end_point.x + mouth_normal.x * end_width / 2,
+                        fan_end_point.y + mouth_normal.y * end_width / 2,
                     ],
                     [
-                        mouth_point.x + mouth_normal.x * river_mouth_width / 2,
-                        mouth_point.y + mouth_normal.y * river_mouth_width / 2,
+                        fan_end_point.x - mouth_normal.x * end_width / 2,
+                        fan_end_point.y - mouth_normal.y * end_width / 2,
                     ],
                 ]
             )
-        else:  # The have the same direction, so reverse
-            # reverse fan order
-            polygon_points.extend(
-                [
-                    [
-                        mouth_point.x + mouth_normal.x * river_mouth_width / 2,
-                        mouth_point.y + mouth_normal.y * river_mouth_width / 2,
-                    ],
+            
+        else:
+            # Load in ocean depth contours and keep only those intersecting the fan centre
+            ocean_contours = self._get_ocean_contours(max(river_mouth_elevations))
+            end_depth = ocean_contours[self.ocean_contour_depth_label].min()
+            ocean_contours = ocean_contours.explode(ignore_index=True)
+            ocean_contours = ocean_contours[
+                ~ocean_contours.intersection(fan_centre).is_empty
+            ]
+
+            # Keep the closest contour intersecting
+            if len(ocean_contours) > 0:
+                ocean_contours = ocean_contours.clip(fan_polygon).explode(ignore_index=True)
+                min_index = ocean_contours.distance(mouth_point).idxmin()
+                intersection_line = ocean_contours.loc[min_index].geometry
+                end_depth = ocean_contours.loc[min_index][self.ocean_contour_depth_label]
+            else:
+                self.logger.warning(
+                    "No ocean contour intersected. Instaed assumed fan geoemtry"
+                )
+                intersection_line = shapely.geometry.LineString(
                     [
-                        mouth_point.x - mouth_normal.x * river_mouth_width / 2,
-                        mouth_point.y - mouth_normal.y * river_mouth_width / 2,
-                    ],
-                ]
+                        [fan_polygon.exterior.xy[0][2], fan_polygon.exterior.xy[1][2]],
+                        [fan_polygon.exterior.xy[0][3], fan_polygon.exterior.xy[1][3]],
+                    ]
+                )
+                end_depth = max(river_mouth_elevations) * 10
+
+            # Construct a fan ending at the contour
+            (x, y) = intersection_line.xy
+            polygon_points = [[xi, yi] for (xi, yi) in zip(x, y)]
+
+            # Check if the intersected contour and mouth normal are roughtly parallel or
+            # anti-parallel
+            unit_vector_contour = numpy.array([x[-1] - x[0], y[-1] - y[0]])
+            unit_vector_contour = unit_vector_contour / numpy.linalg.norm(
+                unit_vector_contour
             )
-        fan_polygon = shapely.geometry.Polygon(polygon_points)
+            unit_vector_mouth = numpy.array([mouth_normal.x, mouth_normal.y])
+
+            # they have the oposite direction
+            if (
+                numpy.arccos(numpy.dot(unit_vector_contour, unit_vector_mouth))
+                > numpy.pi / 2
+            ):
+                # keep line order
+                polygon_points.extend(
+                    [
+                        [
+                            mouth_point.x - mouth_normal.x * river_mouth_width / 2,
+                            mouth_point.y - mouth_normal.y * river_mouth_width / 2,
+                        ],
+                        [
+                            mouth_point.x + mouth_normal.x * river_mouth_width / 2,
+                            mouth_point.y + mouth_normal.y * river_mouth_width / 2,
+                        ],
+                    ]
+                )
+            else:  # The have the same direction, so reverse
+                # reverse fan order
+                polygon_points.extend(
+                    [
+                        [
+                            mouth_point.x + mouth_normal.x * river_mouth_width / 2,
+                            mouth_point.y + mouth_normal.y * river_mouth_width / 2,
+                        ],
+                        [
+                            mouth_point.x - mouth_normal.x * river_mouth_width / 2,
+                            mouth_point.y - mouth_normal.y * river_mouth_width / 2,
+                        ],
+                    ]
+                )
+            fan_polygon = shapely.geometry.Polygon(polygon_points)
         fan_polygon = geopandas.GeoDataFrame(geometry=[fan_polygon], crs=self.crs)
 
         # Get bathymetry values
diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py
index 84bb73c3..b581240b 100644
--- a/src/geofabrics/processor.py
+++ b/src/geofabrics/processor.py
@@ -304,10 +304,17 @@ def get_instruction_general(self, key: str, subkey: str = None):
                 "ocean": None,
             },
             "ignore_clipping": False,
-            "ocean": {
-                "nearest_k_for_interpolation": 40,
-                "use_edge": False,
-                "is_depth": False,
+            "nearest_k_for_interpolation": {
+                "ocean": 40,
+                "lakes": 500,
+                "rivers": 100,
+            },
+            "use_edge": {
+                "ocean": False,
+                "lakes": False,
+            },
+            "is_depth": {
+                "ocean": False,
             },
             "filter_waterways_by_osm_ids": [],
             "compression": 1,
@@ -1178,7 +1185,7 @@ def add_hydrological_features(
                         key="z_labels", subkey="ocean"
                     ),
                     is_depth=self.get_instruction_general(
-                        key="ocean", subkey="is_depth"
+                        key="is_depth", subkey="ocean"
                     ),
                 )
                 # Interpolate
@@ -1186,10 +1193,10 @@ def add_hydrological_features(
                     ocean_points=ocean_points,
                     cache_path=temp_folder,
                     use_edge=self.get_instruction_general(
-                        key="ocean", subkey="use_edge"
+                        key="use_edge", subkey="ocean"
                     ),
                     k_nearest_neighbours=self.get_instruction_general(
-                        key="ocean", subkey="nearest_k_for_interpolation"
+                        key="nearest_k_for_interpolation", subkey="ocean"
                     ),
                     buffer=self.get_instruction_general(key="lidar_buffer"),
                     method=self.get_instruction_general(
@@ -1316,6 +1323,12 @@ def add_hydrological_features(
                     ),
                     cache_path=temp_folder,
                     label="lakes",
+                    include_edges=self.get_instruction_general(
+                        key="use_edge", subkey="lakes"
+                    ),
+                    k_nearest_neighbours=self.get_instruction_general(
+                        key="nearest_k_for_interpolation", subkey="lakes"
+                    ),
                 )
                 temp_file = temp_folder / f"dem_added_{index + 1}_lake.nc"
                 self.logger.info(
@@ -1377,6 +1390,9 @@ def add_hydrological_features(
                     ),
                     cache_path=temp_folder,
                     label="rivers and fans",
+                    k_nearest_neighbours=self.get_instruction_general(
+                        key="nearest_k_for_interpolation", subkey="rivers"
+                    ),
                 )
                 temp_file = temp_folder / f"dem_added_{index + 1}_rivers.nc"
                 self.logger.info(
@@ -1768,12 +1784,13 @@ def load_roads_osm(self) -> bool:
 
         if roads_polygon_path.is_file():
             roads_polygon = geopandas.read_file(roads_path)
-            if roads_polygon.area.sum():
+            if roads_polygon.area.sum() == 0:
                 message = (
                     "Warning zero area roads polygon provided. Will ignore. "
                     f"Please check {roads_polygon_path} if unexpected."
                 )
                 self.logger.warning(message)
+                return roads_polygon
             if "roughness" not in roads_polygon.columns:
                 message = (
                     "No roughnesses defined in the road polygon file. This is "
@@ -1945,7 +1962,7 @@ def run(self):
             )  # Note must be called after all others if it is to be complete
 
             # If roads save temp then add in the roads
-            if roads is not None:
+            if roads is not None and roads.area.sum() > 0:
                 # Cache roughness before adding roads
                 temp_file = temp_folder / "zo_clipped.nc"
                 self.logger.info(f"Save clipped geofabric to netCDF: {temp_file}")
@@ -2103,11 +2120,16 @@ def estimate_river_mouth_fan(self, defaults: dict):
         elevations_clean.to_file(river_bathymetry_file)
 
         # Create fan object
+        if self.check_vector_or_raster(key="ocean_points", api_type="vector"):
+            ocean_points_file = self.get_instruction_path("ocean_points")
+        else:
+            ocean_points_file = None
         fan = geometry.RiverMouthFan(
             aligned_channel_file=river_centreline_file,
             river_bathymetry_file=river_bathymetry_file,
             river_polygon_file=river_polygon_file,
             ocean_contour_file=ocean_contour_file,
+            ocean_points_file=ocean_points_file,
             crs=crs,
             cross_section_spacing=cross_section_spacing,
             elevation_labels=["z"],
@@ -3138,11 +3160,16 @@ def estimate_river_mouth_fan(self):
         )
 
         # Create fan object
+        if self.check_vector_or_raster(key="ocean_points", api_type="vector"):
+            ocean_points_file = self.get_instruction_path("ocean_points")
+        else:
+            ocean_points_file = None
         fan = geometry.RiverMouthFan(
             aligned_channel_file=aligned_channel_file,
             river_bathymetry_file=river_bathymetry_file,
             river_polygon_file=river_polygon_file,
             ocean_contour_file=ocean_contour_file,
+            ocean_points_file=ocean_points_file,
             crs=crs,
             cross_section_spacing=cross_section_spacing,
             elevation_labels=[
@@ -3587,6 +3614,8 @@ def load_waterways(self) -> bool:
 
         if waterways_path.is_file():
             waterways = geopandas.read_file(waterways_path)
+            if source == "osm":
+                waterways = waterways.set_index("OSM_id", drop=True)
             if "width" not in waterways.columns and source == "osm":
                 message = (
                     "For an 'osm' source, the waterways file is generated by "
@@ -3735,7 +3764,6 @@ def run(self):
             elevations.to_file(self.get_result_file_path(key="open_elevation"))
             elevations.to_file(self.get_result_file_path(key="closed_elevation"))
             return
-
         # Create a DEM where the waterways and tunnels are
         self.create_dem(waterways=waterways)
 
diff --git a/src/geofabrics/version.py b/src/geofabrics/version.py
index 9a1caf91..7f63c850 100644
--- a/src/geofabrics/version.py
+++ b/src/geofabrics/version.py
@@ -3,4 +3,4 @@
 Contains the package version information
 """
 
-__version__ = "1.1.24"
+__version__ = "1.1.25"
diff --git a/tests/test_add_patches_ngaruroro/instruction.json b/tests/test_add_patches_ngaruroro/instruction.json
index b940053c..d2d0915f 100644
--- a/tests/test_add_patches_ngaruroro/instruction.json
+++ b/tests/test_add_patches_ngaruroro/instruction.json
@@ -5,7 +5,7 @@
 			"vertical": 7839
 		},
 		"grid_params": {
-			"resolution": 10
+			"resolution": 8
 		}
 	},
     "processing": {
diff --git a/tests/test_many_stages_waikanae/data/benchmark.nc b/tests/test_many_stages_waikanae/data/benchmark.nc
index b91a5d04..f6f907bf 100644
--- a/tests/test_many_stages_waikanae/data/benchmark.nc
+++ b/tests/test_many_stages_waikanae/data/benchmark.nc
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65f580ba43b43b9712dfbe2747c3ab04ef8eb7d9c1a3251bd4ad4d38b93424d7
-size 111104
+oid sha256:d76ccc0adf2e483b286462cab80caecf1353d91203e99d1ee42e62ec2b5337ee
+size 110663
diff --git a/tests/test_many_stages_waikanae/instruction.json b/tests/test_many_stages_waikanae/instruction.json
index b5d7cc75..26b9b160 100644
--- a/tests/test_many_stages_waikanae/instruction.json
+++ b/tests/test_many_stages_waikanae/instruction.json
@@ -166,7 +166,7 @@
 			"lidar_classifications_to_keep": [2, 9],
 			"interpolation": {"no_data": "linear", "ocean": "linear"},
             "filter_waterways_by_osm_ids": [200394974],
-            "ocean": {"use_edge": true, "is_depth": true, "nearest_k_for_interpolation": 50}
+            "use_edge": {"ocean": true}, "is_depth": {"ocean": true}, "nearest_k_for_interpolation": {"ocean": 50}
 		}
 	},
 	"roughness":
diff --git a/tests/test_many_stages_westport/data/benchmark.nc b/tests/test_many_stages_westport/data/benchmark.nc
index 8b140951..d1db625d 100644
--- a/tests/test_many_stages_westport/data/benchmark.nc
+++ b/tests/test_many_stages_westport/data/benchmark.nc
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af8c7475da8979e9ae280b53a1c55037cd01c9436e9d08022ce77fd9f65937bf
-size 111816
+oid sha256:409a4c62b1021ec25a270a1ca8982dd1f9ee73f51d3fd730231f6f04afc0da22
+size 111852
diff --git a/tests/test_many_stages_westport/test_case.py b/tests/test_many_stages_westport/test_case.py
index 8e649dcd..03b73fe4 100644
--- a/tests/test_many_stages_westport/test_case.py
+++ b/tests/test_many_stages_westport/test_case.py
@@ -58,7 +58,6 @@ def setUpClass(cls):
     @pytest.mark.skipif(sys.platform != "win32", reason="Windows test - this is strict")
     def test_result_dem_windows(self):
         """A basic comparison between the generated and benchmark DEM"""
-        decimal = 3
         file_path = (
             self.cache_dir / self.instructions["dem"]["data_paths"]["benchmark_dem"]
         )
@@ -76,12 +75,22 @@ def test_result_dem_windows(self):
             - benchmark_dem.z.data[~numpy.isnan(benchmark_dem.z.data)]
         )
         logging.info(f"DEM array diff is: {diff_array[diff_array != 0]}")
-        numpy.testing.assert_array_almost_equal(
-            test_dem.z.data[~numpy.isnan(test_dem.z.data)],
-            benchmark_dem.z.data[~numpy.isnan(benchmark_dem.z.data)],
-            decimal=decimal,
-            err_msg="The generated result_dem has different data from the "
-            "benchmark_dem",
+        
+        threshold = 10e-2
+        allowable_number_above = 10
+        self.assertTrue(
+            len(diff_array[numpy.abs(diff_array) > threshold])
+            <= allowable_number_above,
+            "more than "
+            f"{allowable_number_above} DEM values differ by more than {threshold} on"
+            f" Linux test run: {diff_array[numpy.abs(diff_array) > threshold]}",
+        )
+        threshold = 10e-4
+        self.assertTrue(
+            len(diff_array[numpy.abs(diff_array) > threshold]) < len(diff_array) / 100,
+            f"{len(diff_array[numpy.abs(diff_array) > threshold])} or more than 1% of "
+            f"DEM values differ by more than {threshold} on Linux test run: "
+            f"{diff_array[numpy.abs(diff_array) > threshold]}",
         )
 
         # explicitly free memory as xarray seems to be hanging onto memory
@@ -121,7 +130,7 @@ def test_result_dem_linux(self):
             f"{allowable_number_above} DEM values differ by more than {threshold} on"
             f" Linux test run: {diff_array[numpy.abs(diff_array) > threshold]}",
         )
-        threshold = 10e-6
+        threshold = 10e-4
         self.assertTrue(
             len(diff_array[numpy.abs(diff_array) > threshold]) < len(diff_array) / 100,
             f"{len(diff_array[numpy.abs(diff_array) > threshold])} or more than 1% of "
diff --git a/tests/test_stopbanks_waikanae/data/benchmark.nc b/tests/test_stopbanks_waikanae/data/benchmark.nc
index 62f6acee..999c35a2 100644
--- a/tests/test_stopbanks_waikanae/data/benchmark.nc
+++ b/tests/test_stopbanks_waikanae/data/benchmark.nc
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:802ba5c1f6b6fef96a2ce4d34d2dd2d386f5cf942b45f14834d4a8a2e0a15ae2
-size 38699
+oid sha256:30ffe59dab92739d6ed23259062b78ad4bfcb155a36a43e6dbac1ed306f44d54
+size 38523