Deltares · JoerivanEngelen · Mar 4, 2025 · Apr 9, 2025 · Apr 9, 2025 · JoerivanEngelen
diff --git a/docs/api/changelog.rst b/docs/api/changelog.rst
@@ -40,6 +40,8 @@ Added
 - :meth:`imod.mf6.HorizontalFlowBarrierResistance.cleanup`,
   :meth:`imod.mf6.SingleLayerHorizontalFlowBarrierResistance.cleanup`,
   to clean up HFB geometries crossing inactive model cells.
+- :func:``imod.prepare.cleanup.cleanup_layered_wel` to clean up wells assigned
+  to layers.
 
 Changed
 ~~~~~~~

diff --git a/imod/mf6/wel.py b/imod/mf6/wel.py
@@ -37,7 +37,7 @@
 from imod.mf6.validation_context import ValidationContext
 from imod.mf6.write_context import WriteContext
 from imod.prepare import assign_wells
-from imod.prepare.cleanup import cleanup_wel
+from imod.prepare.cleanup import cleanup_wel, cleanup_wel_layered
 from imod.schemata import (
     AllValueSchema,
     AnyNoDataSchema,
@@ -946,6 +946,25 @@ def clip_box(
         z_max = self._find_well_value_at_layer(ds, top, layer_max)
         z_min = self._find_well_value_at_layer(ds, bottom, layer_min)
 
+        if (z_max is not None) or (z_min is not None):
+            in_bounds_z_max = None
+            in_bounds_z_min = None
+            if (z_max is not None) and (is_spatial_grid(top)):
+                in_bounds_z_max = np.full(ds.sizes["index"], False)
+                in_bounds_z_max[z_max["index"]] = True
+                z_max = z_max.drop_vars(lambda x: x.coords)
+            if (z_min is not None) and (is_spatial_grid(bottom)):
+                in_bounds_z_min = np.full(ds.sizes["index"], False)
+                in_bounds_z_min[z_min["index"]] = True
+                z_min = z_min.drop_vars(lambda x: x.coords)
+            if (in_bounds_z_max is not None) or (in_bounds_z_min is not None):
+                in_bounds = np.full(ds.sizes["index"], False)
+                if in_bounds_z_max is not None:
+                    in_bounds = in_bounds | in_bounds_z_max
+                if in_bounds_z_min is not None:
+                    in_bounds = in_bounds | in_bounds_z_min
+                ds = ds.loc[{"index": in_bounds}]
+
         if z_max is not None:
             ds["screen_top"] = ds["screen_top"].clip(None, z_max)
         if z_min is not None:
@@ -979,7 +998,7 @@ def _find_well_value_at_layer(
                 x=well_dataset["x"].values,
                 y=well_dataset["y"].values,
                 out_of_bounds="ignore",
-            ).drop_vars(lambda x: x.coords)
+            )
 
         return value
 
@@ -1395,6 +1414,40 @@ def from_imod5_cap_data(cls, imod5_data: Imod5DataDict):
         data = well_from_imod5_cap_data(imod5_data)
         return cls(**data)  # type: ignore
 
+    @standard_log_decorator()
+    def cleanup(self, dis: StructuredDiscretization | VerticesDiscretization):
+        """
+        Clean up package inplace. This method calls
+        :func:`imod.prepare.cleanup_wel_layered`, see documentation of that
+        function for details on cleanup.
+
+        dis: imod.mf6.StructuredDiscretization | imod.mf6.VerticesDiscretization
+            Model discretization package.
+        """
+        # Top and bottom should be forced to grids with a x, y coordinates
+        top, bottom = broadcast_to_full_domain(**dict(dis.dataset.data_vars))
+        # Collect point variable datanames
+        point_varnames = list(self._write_schemata.keys())
+        if "concentration" not in self.dataset.keys():
+            point_varnames.remove("concentration")
+        point_varnames.append("id")
+        # Create dataset with purely point locations
+        point_ds = self.dataset[point_varnames]
+        # Take first item of irrelevant dimensions
+        point_ds = point_ds.isel(time=0, species=0, drop=True, missing_dims="ignore")
+        # Cleanup well dataframe
+        wells = point_ds.to_dataframe()
+        cleaned_wells = cleanup_wel_layered(wells, top.isel(layer=0), bottom)
+        # Select with ids in cleaned dataframe to drop points outside grid.
+        well_ids = cleaned_wells.index
+        dataset_cleaned = self.dataset.swap_dims({"index": "id"}).sel(id=well_ids)
+        # Ensure dtype of id is preserved
+        id_type = self.dataset["id"].dtype
+        dataset_cleaned = dataset_cleaned.swap_dims({"id": "index"}).reset_coords("id")
+        dataset_cleaned["id"] = dataset_cleaned["id"].astype(id_type)
+        # Override dataset
+        self.dataset = dataset_cleaned
+
 
 class WellDisStructured(DisStructuredBoundaryCondition):
     """

diff --git a/imod/prepare/cleanup.py b/imod/prepare/cleanup.py
@@ -287,6 +287,38 @@ def _set_ultrathin_filters_to_point_filters(
     return cleaned_wells
 
 
+def cleanup_wel_layered(
+    wells: pd.DataFrame, top: GridDataArray, bottom: GridDataArray
+) -> pd.DataFrame:
+    """
+    Clean up dataframe with wells, fixes some common mistakes in the following
+    order:
+
+    1. Wells outside grid bounds are dropped
+
+    Parameters
+    ----------
+    wells: pandas.Dataframe
+        Dataframe with wells to be cleaned up. Requires columns ``"x", "y",
+        "id"``
+    top: xarray.DataArray | xugrid.UgridDataArray
+        Grid with model top
+    bottom: xarray.DataArray | xugrid.UgridDataArray
+        Grid with model bottoms
+
+    Returns
+    -------
+    pandas.DataFrame
+        Cleaned well dataframe.
+    """
+    validate_well_columnnames(wells, names={"x", "y", "id"})
+
+    cleaned_wells, xy_top_series, xy_base_series = _locate_wells_in_bounds(
+        wells, top, bottom
+    )
+    return cleaned_wells
+
+
 def cleanup_wel(
     wells: pd.DataFrame,
     top: GridDataArray,

diff --git a/imod/prepare/wells.py b/imod/prepare/wells.py
@@ -97,7 +97,7 @@ def locate_wells(
 
     # Default to a xy_k value of 1.0: weigh every layer equally.
     xy_k: float | GridDataArray = 1.0
-    first = wells.groupby("id").first()
+    first = wells.groupby("id", sort=False).first()
     x = first["x"].to_numpy()
     y = first["y"].to_numpy()
 
@@ -193,7 +193,7 @@ def assign_wells(
         wells, top, bottom, k, validate
     )
     wells_in_bounds = wells.set_index("id").loc[id_in_bounds].reset_index()
-    first = wells_in_bounds.groupby("id").first()
+    first = wells_in_bounds.groupby("id", sort=False).first()
     overlap = compute_overlap(first, xy_top, xy_bottom)
 
     if isinstance(xy_k, (xr.DataArray, xu.UgridDataArray)):

diff --git a/imod/tests/fixtures/mf6_welltest_fixture.py b/imod/tests/fixtures/mf6_welltest_fixture.py
@@ -10,18 +10,22 @@ def mf6wel_test_data_stationary():
         [
             [1, 1, 9],
             [1, 2, 9],
+            [1, 3, 9],
             [1, 1, 8],
             [1, 2, 8],
-            [2, 3, 7],
+            [1, 3, 8],
             [2, 4, 7],
-            [2, 3, 6],
+            [2, 5, 7],
+            [2, 6, 7],
             [2, 4, 6],
+            [2, 5, 6],
+            [2, 6, 6],
         ],
     )
-    coords = {"ncellid": np.arange(8) + 1, "dim_cellid": ["layer", "row", "column"]}
+    coords = {"ncellid": np.arange(12) + 1, "dim_cellid": ["layer", "row", "column"]}
     cellid = xr.DataArray(cellid_values, coords=coords, dims=("ncellid", "dim_cellid"))
     rate = xr.DataArray(
-        [1.0] * 8, coords={"ncellid": np.arange(8) + 1}, dims=("ncellid",)
+        [1.0] * 12, coords={"ncellid": np.arange(12) + 1}, dims=("ncellid",)
     )
     return cellid, rate
 
@@ -32,18 +36,22 @@ def mf6wel_test_data_transient():
         [
             [1, 1, 9],
             [1, 2, 9],
+            [1, 3, 9],
             [1, 1, 8],
             [1, 2, 8],
-            [2, 3, 7],
+            [1, 3, 8],
             [2, 4, 7],
-            [2, 3, 6],
+            [2, 5, 7],
+            [2, 6, 7],
             [2, 4, 6],
+            [2, 5, 6],
+            [2, 6, 6],
         ],
     )
-    coords = {"ncellid": np.arange(8) + 1, "dim_cellid": ["layer", "row", "column"]}
+    coords = {"ncellid": np.arange(12) + 1, "dim_cellid": ["layer", "row", "column"]}
     cellid = xr.DataArray(cellid_values, coords=coords, dims=("ncellid", "dim_cellid"))
 
-    rate = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+    rate = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     globaltimes = pd.date_range("2000-01-01", "2000-01-06")
     weltimes = globaltimes[:-1]
@@ -52,7 +60,7 @@ def mf6wel_test_data_transient():
         np.arange(len(weltimes)) + 1, coords={"time": weltimes}, dims=("time",)
     )
     rate_cells = xr.DataArray(
-        rate, coords={"ncellid": np.arange(8) + 1}, dims=("ncellid",)
+        rate, coords={"ncellid": np.arange(12) + 1}, dims=("ncellid",)
     )
     rate_wel = rate_time * rate_cells
 
@@ -61,13 +69,26 @@ def mf6wel_test_data_transient():
 
 @pytest.fixture(scope="session")
 def well_high_lvl_test_data_stationary():
-    screen_top = [0.0, 0.0, 0.0, 0.0, -6.0, -6.0, -6.0, -6.0]
-    screen_bottom = [-2.0, -2.0, -2.0, -2.0, -20.0, -20.0, -20.0, -20.0]
+    screen_top = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0]
+    screen_bottom = [
+        -2.0,
+        -2.0,
+        -2.0,
+        -2.0,
+        -2.0,
+        -2.0,
+        -20.0,
+        -20.0,
+        -20.0,
+        -20.0,
+        -20.0,
+        -20.0,
+    ]
     # fmt: off
-    y = [83.0, 77.0, 82.0, 71.0, 62.0, 52.0, 66.0, 59.0]
-    x = [81.0, 82.0, 75.0, 77.0, 68.0, 64.0, 52.0, 51.0]
+    y = [83.0, 77.0, 66.0, 82.0, 71.0, 61.0, 59.0, 44.0, 33.0, 52.0, 41.0, 38.0]
+    x = [81.0, 82.0, 88.0, 75.0, 77.0, 71.0, 68.0, 64.0, 61.0, 52.0, 51.0, 59.0]
     # fmt: on
-    rate = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+    rate = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
     rate_wel = xr.DataArray(rate, dims=("index",))
 
     da_species = xr.DataArray(
@@ -83,13 +104,26 @@ def well_high_lvl_test_data_stationary():
 
 @pytest.fixture(scope="session")
 def well_high_lvl_test_data_transient():
-    screen_top = [0.0, 0.0, 0.0, 0.0, -6.0, -6.0, -6.0, -6.0]
-    screen_bottom = [-2.0, -2.0, -2.0, -2.0, -20.0, -20.0, -20.0, -20.0]
+    screen_top = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -6.0, -6.0, -6.0, -6.0, -6.0, -6.0]
+    screen_bottom = [
+        -2.0,
+        -2.0,
+        -2.0,
+        -2.0,
+        -2.0,
+        -2.0,
+        -20.0,
+        -20.0,
+        -20.0,
+        -20.0,
+        -20.0,
+        -20.0,
+    ]
     # fmt: off
-    y = [83.0, 77.0, 82.0, 71.0, 62.0, 52.0, 66.0, 59.0]
-    x = [81.0, 82.0, 75.0, 77.0, 68.0, 64.0, 52.0, 51.0]
+    y = [83.0, 77.0, 66.0, 82.0, 71.0, 61.0, 59.0, 44.0, 33.0, 52.0, 41.0, 38.0]
+    x = [81.0, 82.0, 88.0, 75.0, 77.0, 71.0, 68.0, 64.0, 61.0, 52.0, 51.0, 59.0]
     # fmt: on
-    rate = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+    rate = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     globaltimes = pd.date_range("2000-01-01", "2000-01-06")
     weltimes = globaltimes[:-1]