add elevation and slope properties (#151)

### Pull Request Checklist: - [ ] This PR addresses an already opened issue (for bug fixes / features) - [X] (If applicable) Documentation has been added / updated (for bug fixes / features). - [X] (If applicable) Tests have been added. - [X] CHANGELOG.rst has been updated (with summary of main changes). - [X] Link to issue (:issue:`number`) and pull request (:pull:`number`) has been added. ### What kind of change does this PR introduce? * This PR adds functions in the GIS module to extract elevation, slope and aspect ratio from a set of basins. * This also fixes a small bug (of lack of feature) in the current GIS module that limited the land use classification to the bounding box of a watershed rather than its boundaries. ### Does this PR introduce a breaking change? No ### Other information:
hydrologie · Jun 10, 2024 · 3edfe44 · 3edfe44
2 parents 2e01e37 + 1097f9f
commit 3edfe44
Show file tree

Hide file tree

Showing 8 changed files with 920 additions and 178 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -128,7 +128,7 @@ jobs:
         shell: bash -l {0}
     steps:
       - name: Harden Runner
-        uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0
+        uses: step-security/harden-runner@17d0e2bd7d51742c71671bd19fa12bdc9d40a3d6 # v2.8.1
         with:
           disable-sudo: true
           egress-policy: block
@@ -137,6 +137,7 @@ jobs:
             cdn.proj.org:443
             conda.anaconda.org:443
             coveralls.io:443
+            elevationeuwest.blob.core.windows.net:443
             files.pythonhosted.org:443
             github.com:443
             objects.githubusercontent.com:443

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -4,14 +4,15 @@ Changelog
 
 v0.4.0 (unreleased)
 -------------------
-Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Richard Arsenault (:user:`richardarsenault`).
+Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Richard Arsenault (:user:`richardarsenault`), Sébastien Langlois (:user:`sebastienlanglois`).
 
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 * Added support for the Hydrotel hydrological model. (:pull:`18`).
 * Added support for various hydrological models emulated through the Raven hydrological framework. (:pull:`128`).
 * Added optimal interpolation functions for time-series and streamflow indicators. (:pull:`88`, :pull:`129`).
 * Added optimal interpolation notebooks. (:pull:`123`).
+* Added surface properties (elevation, slope, aspect ratio) to the `gis` module. (:pull:`151`).
 
 Breaking changes
 ^^^^^^^^^^^^^^^^

diff --git a/docs/notebooks/gis.ipynb b/docs/notebooks/gis.ipynb
diff --git a/environment-dev.yml b/environment-dev.yml
@@ -19,14 +19,17 @@ dependencies:
   - pydantic >=2.0,<2.5.3  # FIXME: Remove pin once our dependencies (xclim, xscen) support pydantic 2.5.3
   - pyyaml
   - rasterio
+  - ravenpy
+  - rioxarray
   - spotpy
   - stackstac
   - statsmodels
-  - ravenpy
   - tqdm
   - xarray >=2023.11.0
+  - xarray-spatial
   - xclim >=0.48.2
   - xscen >=0.8.3
+  - xvec
   - pip >=23.3.0
   - pip:
     - xdatasets >=0.3.5

diff --git a/environment.yml b/environment.yml
@@ -19,14 +19,17 @@ dependencies:
   - pystac-client
   - pyyaml
   - rasterio
+  - ravenpy
+  - rioxarray
   - spotpy
   - stackstac
   - statsmodels
-  - ravenpy
   - tqdm
   - xarray >=2023.11.0
+  - xarray-spatial
   - xclim >=0.48.2
   - xscen >=0.8.3
+  - xvec
   - pip >=23.3.0
   - pip:
     - xdatasets >=0.3.5
diff --git a/pyproject.toml b/pyproject.toml
@@ -53,14 +53,17 @@ dependencies = [
   "pyyaml",
   "rasterio",
   "ravenpy",
+  "rioxarray",
   "spotpy",
   "stackstac",
   "statsmodels",
   "tqdm",
   "xarray>=2023.11.0",
+  "xarray-spatial",
   "xclim>=0.48.2",
   "xdatasets>=0.3.5",
-  "xscen>=0.8.3"
+  "xscen>=0.8.3",
+  "xvec"
 ]
 
 [project.optional-dependencies]

diff --git a/src/xhydro/gis.py b/src/xhydro/gis.py
@@ -17,17 +17,21 @@
 import pystac_client
 import rasterio
 import rasterio.features
+import rioxarray  # noqa: F401
 import stackstac
 import xarray as xr
+import xvec  # noqa: F401
 from matplotlib.colors import ListedColormap
 from pystac.extensions.item_assets import ItemAssetsExtension
 from pystac.extensions.projection import ProjectionExtension as proj
 from shapely import Point
 from tqdm.auto import tqdm
+from xrspatial import aspect, slope
 
 __all__ = [
     "land_use_classification",
     "land_use_plot",
+    "surface_properties",
     "watershed_delineation",
     "watershed_properties",
 ]
@@ -264,6 +268,122 @@ def _recursive_upstream_lookup(
     return all_upstream_indexes
 
 
+def _flatten(x, dim="time"):
+    assert isinstance(x, xr.DataArray)
+    if len(x[dim].values) > len(set(x[dim].values)):
+        x = x.groupby(dim).map(stackstac.mosaic)
+
+    return x
+
+
+def surface_properties(
+    gdf: gpd.GeoDataFrame,
+    unique_id: str = None,
+    projected_crs: int = 6622,
+    output_format: str = "geopandas",
+    operation: str = "mean",
+    dataset_date: str = "2021-04-22",
+    collection: str = "cop-dem-glo-90",
+) -> gpd.GeoDataFrame | xr.Dataset:
+    """Surface properties for watersheds.
+
+    Surface properties are calculated using Copernicus's GLO-90 Digital Elevation Model. By default, the dataset
+    has a geographic coordinate system (EPSG: 4326) and this function expects a projected crs for more accurate results.
+
+    The calculated properties are :
+    - elevation (meters)
+    - slope (degrees)
+    - aspect ratio (degrees)
+
+    Parameters
+    ----------
+    gdf : gpd.GeoDataFrame
+        GeoDataFrame containing watershed polygons. Must have a defined .crs attribute.
+    unique_id : str
+        The column name in the GeoDataFrame that serves as a unique identifier.
+    projected_crs : int
+        The projected coordinate reference system (crs) to utilize for calculations, such as determining watershed area.
+    output_format : str
+        One of either `xarray` (or `xr.Dataset`) or `geopandas` (or `gpd.GeoDataFrame`).
+    operation : str
+        Aggregation statistics such as `mean` or `sum`.
+    dataset_date : str
+        Date (%Y-%m-%d) for which to select the imagery from the dataset. Date must be available.
+    collection : str
+        Collection name from the Planetary Computer STAC Catalog. Default is `cop-dem-glo-90`.
+
+    Returns
+    -------
+    gpd.GeoDataFrame or xr.Dataset
+        Output dataset containing the surface properties.
+    """
+    # Geometries are projected to make calculations more accurate
+    projected_gdf = gdf.to_crs(projected_crs)
+
+    catalog = pystac_client.Client.open(
+        "https://planetarycomputer.microsoft.com/api/stac/v1",
+    )
+
+    search = catalog.search(
+        collections=[collection],
+        bbox=gdf.total_bounds,
+    )
+
+    items = list(search.get_items())
+
+    # Create a mosaic of
+    da = stackstac.stack(items)
+    da = _flatten(
+        da, dim="time"
+    )  # https://hrodmn.dev/posts/stackstac/#wrangle-the-time-dimension
+    ds = (
+        da.sel(time=dataset_date)
+        .coarsen({"y": 5, "x": 5}, boundary="trim")
+        .mean()
+        .to_dataset(name="elevation")
+        .rio.write_crs("epsg:4326", inplace=True)
+        .rio.reproject(projected_crs)
+        .isel(band=0)
+    )
+
+    # Use Xvec to extract elevation for each geometry in the projected gdf
+    da_elevation = ds.xvec.zonal_stats(
+        projected_gdf.geometry, x_coords="x", y_coords="y", stats=operation
+    )["elevation"].squeeze()
+
+    da_slope = slope(ds.elevation)
+
+    # Use Xvec to extract slope for each geometry in the projected gdf
+    da_slope = da_slope.to_dataset(name="slope").xvec.zonal_stats(
+        projected_gdf.geometry, x_coords="x", y_coords="y", stats=operation
+    )["slope"]
+
+    da_aspect = aspect(ds.elevation)
+
+    # Use Xvec to extract aspect for each geometry in the projected gdf
+    da_aspect = da_aspect.to_dataset(name="aspect").xvec.zonal_stats(
+        projected_gdf.geometry, x_coords="x", y_coords="y", stats=operation
+    )["aspect"]
+
+    output_dataset = xr.merge([da_elevation, da_slope, da_aspect]).astype("float32")
+
+    # Add attributes for each variable
+    output_dataset["slope"].attrs = {"units": "degrees"}
+    output_dataset["aspect"].attrs = {"units": "degrees"}
+    output_dataset["elevation"].attrs = {"units": "m"}
+
+    if unique_id is not None:
+        output_dataset = output_dataset.assign_coords(
+            {unique_id: ("geometry", gdf[unique_id])}
+        )
+        output_dataset = output_dataset.swap_dims({"geometry": unique_id})
+
+    if output_format in ("geopandas", "gpd.GeoDataFrame"):
+        output_dataset = output_dataset.drop("geometry").to_dataframe()
+
+    return output_dataset
+
+
 def _merge_stac_dataset(catalog, bbox_of_interest, year):
     search = catalog.search(collections=["io-lulc-9-class"], bbox=bbox_of_interest)
     items = search.item_collection()
@@ -309,12 +429,19 @@ def _count_pixels_from_bbox(
 ):
     bbox_of_interest = gdf.iloc[[idx]].total_bounds
 
-    merged, _ = _merge_stac_dataset(catalog, bbox_of_interest, year)
+    merged, item = _merge_stac_dataset(catalog, bbox_of_interest, year)
+    epsg = item.properties["proj:epsg"]
+
+    # Mask with polygon
+    merged = merged.rio.write_crs(epsg).rio.clip([gdf.to_crs(epsg).iloc[idx].geometry])
 
     data = merged.data.ravel()
+    data = data[data != 0]
+
     df = pd.DataFrame(
         pd.value_counts(data, sort=False).rename(values_to_classes) / data.shape[0]
     )
+
     if unique_id is not None:
         column_name = [gdf[unique_id].iloc[idx]]
     else:

diff --git a/tests/test_gis.py b/tests/test_gis.py
@@ -123,23 +123,78 @@ def test_watershed_properties_xarray(self, watershed_properties_data):
 
         xr.testing.assert_allclose(ds_properties, output_dataset)
 
+    @pytest.fixture
+    def surface_properties_data(self):
+        data = {
+            "elevation": {"031501": 46.3385009765625, "042103": 358.54986572265625},
+            "slope": {"031501": 0.4634914696216583, "042103": 2.5006439685821533},
+            "aspect": {"031501": 241.46539306640625, "042103": 178.55764770507812},
+        }
+
+        df = pd.DataFrame.from_dict(data).astype("float32")
+        df.index.names = ["Station"]
+        return df
+
+    def test_surface_properties(self, surface_properties_data):
+        _properties_name = ["elevation", "slope", "aspect"]
+
+        df_properties = xh.gis.surface_properties(self.gdf)
+        df_properties.index.name = None
+
+        pd.testing.assert_frame_equal(
+            df_properties[_properties_name],
+            surface_properties_data.reset_index(drop=True)[_properties_name],
+        )
+
+    def test_surface_properties_unique_id(self, surface_properties_data):
+        _properties_name = ["elevation", "slope", "aspect"]
+        unique_id = "Station"
+
+        df_properties = xh.gis.surface_properties(self.gdf, unique_id=unique_id)
+
+        pd.testing.assert_frame_equal(
+            df_properties[_properties_name],
+            surface_properties_data[_properties_name],
+        )
+
+    def test_surface_properties_xarray(self, surface_properties_data):
+        unique_id = "Station"
+
+        ds_properties = xh.gis.surface_properties(
+            self.gdf, unique_id=unique_id, output_format="xarray"
+        )
+        ds_properties = ds_properties.drop(
+            list(set(ds_properties.coords) - set(ds_properties.dims))
+        )
+
+        assert ds_properties.elevation.attrs["units"] == "m"
+        assert ds_properties.slope.attrs["units"] == "degrees"
+        assert ds_properties.aspect.attrs["units"] == "degrees"
+
+        output_dataset = surface_properties_data.to_xarray()
+        output_dataset["elevation"].attrs = {"units": "m"}
+        output_dataset["slope"].attrs = {"units": "degrees"}
+        output_dataset["aspect"].attrs = {"units": "degrees"}
+
+        xr.testing.assert_allclose(ds_properties, output_dataset)
+
     @pytest.fixture
     def land_classification_data_latest(self):
         data = {
-            "pct_crops": {"031501": 0.7761508991718495, "042103": 0.0},
             "pct_built_area": {
-                "031501": 0.030159065706857738,
-                "042103": 0.00010067694852579148,
+                "031501": 0.015321084992280073,
+                "042103": 1.291553583975092e-05,
             },
-            "pct_trees": {"031501": 0.1916484013692483, "042103": 0.8636022653195444},
+            "pct_crops": {"031501": 0.7241017020382433, "042103": 0.0},
+            "pct_trees": {"031501": 0.25554784070456893, "042103": 0.8904406091999945},
             "pct_rangeland": {
-                "031501": 0.002041633752044415,
-                "042103": 0.026126172157203968,
+                "031501": 0.005029372264907681,
+                "042103": 0.02405165525507322,
             },
-            "pct_water": {"031501": 0.0, "042103": 0.10998710919246692},
-            "pct_bare_ground": {"031501": 0.0, "042103": 2.142062734591308e-05},
-            "pct_flooded_vegetation": {"031501": 0.0, "042103": 0.00016197774384218392},
-            "pct_snow/ice": {"031501": 0.0, "042103": 3.780110708102308e-07},
+            "pct_water": {"031501": 0.0, "042103": 0.08536996982930828},
+            "pct_snow/ice": {"031501": 0.0, "042103": 3.444142890600245e-07},
+            "pct_bare_ground": {"031501": 0.0, "042103": 1.1193464394450798e-05},
+            "pct_flooded_vegetation": {"031501": 0.0, "042103": 0.00011331230110074807},
         }
 
         df = pd.DataFrame.from_dict(data)
@@ -149,19 +204,19 @@ def land_classification_data_latest(self):
     @pytest.fixture
     def land_classification_data_2018(self):
         data = {
-            "pct_crops": {"031501": 0.7746247733063341, "042103": 0.0},
             "pct_built_area": {
-                "031501": 0.028853606886295277,
-                "042103": 0.0001139703378492846,
+                "031501": 0.0157641813680258,
+                "042103": 3.857440037472275e-05,
             },
-            "pct_trees": {"031501": 0.19468025530620797, "042103": 0.8850292558518161},
+            "pct_crops": {"031501": 0.7236266296353819, "042103": 0.0},
+            "pct_trees": {"031501": 0.2563609453940817, "042103": 0.9106845922823646},
             "pct_rangeland": {
-                "031501": 0.0018413645011626744,
-                "042103": 0.005653344569502407,
+                "031501": 0.004248243602510575,
+                "042103": 0.004328943199195448,
             },
-            "pct_water": {"031501": 0.0, "042103": 0.10902236193791408},
-            "pct_bare_ground": {"031501": 0.0, "042103": 1.8900553540511542e-05},
-            "pct_flooded_vegetation": {"031501": 0.0, "042103": 0.00016216674937758903},
+            "pct_water": {"031501": 0.0, "042103": 0.08475932329480486},
+            "pct_flooded_vegetation": {"031501": 0.0, "042103": 0.0001883946161158334},
+            "pct_bare_ground": {"031501": 0.0, "042103": 1.7220714453001225e-07},
         }
 
         df = pd.DataFrame.from_dict(data)