Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Dimension Mismatch in ETD Calculation Error #393

Merged
merged 6 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"import ecoscope\n",
"from ecoscope.analysis.UD import calculate_etd_range\n",
"from ecoscope.analysis.percentile import get_percentile_area\n",
"from ecoscope.io.raster import RasterData\n",
"\n",
"ecoscope.init()"
]
Expand Down Expand Up @@ -255,7 +256,12 @@
"outputs": [],
"source": [
"percentiles = pd.concat(\n",
" [get_percentile_area(percentile_levels=[50, 99.9, 100.0], raster_path=v, subject_id=k) for k, v in etd.items()]\n",
" [\n",
" get_percentile_area(\n",
" percentile_levels=[50, 99.9, 100.0], raster_data=RasterData.from_raster_file(v), subject_id=k\n",
" )\n",
" for k, v in etd.items()\n",
" ]\n",
").reset_index(drop=True)\n",
"\n",
"percentiles"
Expand Down Expand Up @@ -301,7 +307,7 @@
"source": [
"salif = get_percentile_area(\n",
" percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
" raster_path=etd.at[\"Salif Keita\"],\n",
" raster_data=RasterData.from_raster_file(etd.at[\"Salif Keita\"]),\n",
" subject_id=\"Salif Keita\",\n",
")"
]
Expand Down
3 changes: 2 additions & 1 deletion doc/source/notebooks/04. EcoMap & EcoPlot/EcoMap.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,10 @@
"metadata": {},
"outputs": [],
"source": [
"raster_data = ecoscope.io.raster.RasterData.from_raster_file(etd.at[\"Salif Keita\"])\n",
"percentile_areas = get_percentile_area(\n",
" percentile_levels=[50, 60, 70, 80, 90, 99.9],\n",
" raster_path=etd.at[\"Salif Keita\"],\n",
" raster_data=raster_data,\n",
" subject_id=\"Salif Keita\",\n",
").to_crs(4326)\n",
"\n",
Expand Down
47 changes: 35 additions & 12 deletions ecoscope/analysis/UD/etd_range.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
import logging
import math
import os
import typing
from dataclasses import dataclass

import numpy as np

from ecoscope.base import Trajectory
from ecoscope.io import raster

try:
import numba as nb
import scipy
from sklearn import neighbors
from scipy.optimize import minimize
from scipy.stats import weibull_min
from sklearn import neighbors
except ModuleNotFoundError:
raise ModuleNotFoundError(
'Missing optional dependencies required by this module. \
Please run pip install ecoscope["analysis"]'
)

logger = logging.getLogger(__name__)


@nb.cfunc("double(intc, CPointer(double))")
def __etd__(_, a):
Expand Down Expand Up @@ -84,13 +88,14 @@ class Weibull3Parameter(WeibullPDF):

def calculate_etd_range(
trajectory_gdf: Trajectory,
output_path: typing.Union[str, bytes, os.PathLike],
output_path: typing.Union[str, bytes, os.PathLike, None] = None,
max_speed_kmhr: float = 0.0,
max_speed_percentage: float = 0.9999,
raster_profile: raster.RasterProfile = None,
expansion_factor: float = 1.3,
weibull_pdf: typing.Union[Weibull2Parameter, Weibull3Parameter] = Weibull2Parameter(),
) -> None:
grid_threshold: int = 100,
) -> raster.RasterData:
"""
The ETDRange class provides a trajectory-based, nonparametric approach to estimate the utilization distribution (UD)
of an animal, using model parameters derived directly from the movement behaviour of the species.
Expand All @@ -100,12 +105,13 @@ def calculate_etd_range(
Parameters
----------
trajectory_gdf : geopandas.GeoDataFrame
output_path : str or PathLike
output_path : str or PathLike or None
max_speed_kmhr : float
max_speed_percentage : 0.999
raster_profile : raster.RasterProfile
expansion_factor : float
weibull_pdf : Weibull2Parameter or Weibull3Parameter
grid_threshold: int

Returns
-------
Expand Down Expand Up @@ -169,8 +175,20 @@ def calculate_etd_range(
grid_centroids[1, 0] = y_max - raster_profile.pixel_size * 0.5

centroids_coords = np.dot(grid_centroids, np.mgrid[1:2, :num_columns, :num_rows].T.reshape(-1, 3, 1))
centroids_coords = centroids_coords.squeeze().T

if centroids_coords.size < grid_threshold:
logger.warning(
f"Centroid size {centroids_coords.size} is too small to calculate density. "
f"The threshold value is {grid_threshold}. "
"Check if there’s a data issue or decrease pixel size"
)
return raster.RasterData(data=np.array([]), crs=raster_profile.crs, transform=raster_profile.transform)

if centroids_coords.ndim != 2:
centroids_coords = centroids_coords.reshape(1, -1)

tr = neighbors.KDTree(centroids_coords.squeeze().T)
tr = neighbors.KDTree(centroids_coords)

del centroids_coords

Expand Down Expand Up @@ -206,12 +224,17 @@ def calculate_etd_range(
# Normalize the grid values
raster_ndarray = raster_ndarray / raster_ndarray.sum()

# Set the null data values
raster_ndarray[raster_ndarray == 0] = raster_profile.nodata_value
ndarray = raster_ndarray.reshape(num_rows, num_columns)

# write raster_ndarray to GeoTIFF file.
raster.RasterPy.write(
ndarray=raster_ndarray.reshape(num_rows, num_columns),
fp=output_path,
**raster_profile,
)
if output_path:
# Set the null data values
raster_ndarray[np.isnan(raster_ndarray) | (raster_ndarray == 0)] = raster_profile.nodata_value

raster.RasterPy.write(
ndarray,
fp=output_path,
**raster_profile,
)

return raster.RasterData(data=ndarray.astype("float32"), crs=raster_profile.crs, transform=raster_profile.transform)
108 changes: 36 additions & 72 deletions ecoscope/analysis/percentile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
import typing
from dataclasses import dataclass, field

import geopandas as gpd
import numpy as np
Expand All @@ -9,83 +7,25 @@
from shapely.geometry import shape
from shapely.geometry.multipolygon import MultiPolygon

from ecoscope.io import raster

@dataclass
class PercentileAreaProfile:
input_raster: typing.Union[str, bytes, os.PathLike]
percentile_levels: typing.List = field(default_factory=[50.0])
subject_id: str = ""


class PercentileArea:
@staticmethod
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])

@classmethod
def calculate_percentile_area(cls, profile: PercentileAreaProfile):
"""
Parameters
----------
profile: PercentileAreaProfile
dataclass object with information for percentile-area calculation

Returns
-------
GeodataFrame
"""

assert type(profile) is PercentileAreaProfile
shapes = []

# open raster
with rasterio.open(profile.input_raster) as src:
crs = src.crs.to_wkt()

for percentile in profile.percentile_levels:
data_array = src.read(1).astype(np.float32)

# Mask no-data values
data_array[data_array == src.nodata] = np.nan

# calculate percentile value
# percentile_val = np.percentile(data_array[~np.isnan(data_array)], 100.0 - percentile)
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]

# TODO: make a more explicit comparison for less than and greater than

# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan

# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile

shapes.extend(rasterio.features.shapes(data_array, transform=src.transform))

return gpd.GeoDataFrame(
[
[profile.subject_id, percentile, cls._multipolygon(shapes, percentile)]
for percentile in sorted(profile.percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=crs,
)
def _multipolygon(shapes, percentile):
return MultiPolygon([shape(geom) for geom, value in shapes if np.isclose(value, percentile)])


def get_percentile_area(
percentile_levels: typing.List,
raster_path: typing.Union[str, bytes, os.PathLike],
raster_data: raster.RasterData,
subject_id: str = "",
):
) -> gpd.GeoDataFrame:
"""
Parameters
----------
percentile_levels: Typing.List[Int]
list of k-th percentile scores.
raster_path: str or os.PathLike
file path to where the raster is stored.
raster_data: raster.RasterData
array of raster values
subject_id: str
unique identifier for the subject

Expand All @@ -94,9 +34,33 @@ def get_percentile_area(
GeoDataFrame

"""
percentile_profile = PercentileAreaProfile(
percentile_levels=percentile_levels,
input_raster=raster_path,
subject_id=subject_id,
shapes = []
for percentile in percentile_levels:
data_array = raster_data.data.copy()

# calculate percentile value
values = np.sort(data_array[~np.isnan(data_array)]).flatten()
if len(values) == 0:
continue

csum = np.cumsum(values)
percentile_val = values[np.argmin(np.abs(csum[-1] * (1 - percentile / 100) - csum))]

# TODO: make a more explicit comparison for less than and greater than

# Set any vals less than the cutoff to be nan
data_array[data_array < percentile_val] = np.nan

# Mask any vals that are less than the cutoff percentile
data_array[data_array >= percentile_val] = percentile

shapes.extend(rasterio.features.shapes(data_array, transform=raster_data.transform))

return gpd.GeoDataFrame(
[
[subject_id, percentile, _multipolygon(shapes, percentile)]
for percentile in sorted(percentile_levels, reverse=True)
],
columns=["subject_id", "percentile", "geometry"],
crs=raster_data.crs,
)
return PercentileArea.calculate_percentile_area(profile=percentile_profile)
2 changes: 1 addition & 1 deletion ecoscope/io/earthranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ def upload(obs):
"records"
)
del obs["geometry"]
obs = pack_columns(obs, columns=["source", "recorded_at", "location"])
obs = pack_columns(obs, columns=["source", "recorded_at", "location", "exclusion_flags", "additional"])
post_data = obs.to_dict("records")
results = super(EarthRangerIO, self).post_observation(post_data)
except ERClientException as exc:
Expand Down
26 changes: 26 additions & 0 deletions ecoscope/io/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import math
import os
from collections import UserDict
from dataclasses import dataclass

import geopandas as gpd
import numpy as np
Expand Down Expand Up @@ -102,6 +103,20 @@ def __setitem__(self, key, item):
self._recompute_transform_(key)


@dataclass
class RasterData:
data: np.ndarray
crs: str
transform: rio.Affine

@classmethod
def from_raster_file(cls, raster_path):
with rasterio.open(raster_path) as src:
data_array = src.read(1).astype(np.float32)
data_array[data_array == src.nodata] = np.nan
return cls(data_array, src.crs.to_wkt(), src.transform)


class RasterPy:
@classmethod
def write(
Expand Down Expand Up @@ -249,3 +264,14 @@ def grid_to_raster(grid=None, val_column="", out_dir="", raster_name=None, xlen=
).write(vals, 1)

return memfile


def raster_to_grid(raster_path):
with rasterio.open(raster_path) as src:
data_array = src.read(1).astype(np.float32)
data_array[data_array == src.nodata] = np.nan


def get_crs(raster_path):
with rasterio.open(raster_path) as src:
return src.crs.to_wkt()
2 changes: 1 addition & 1 deletion ecoscope/plotting/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def draw_historic_timeseries(
The title shown in the plot legend for historic band
historic_mean_column: str
The name of the dataframe column to pull historic mean values from
current_value_title: str
historic_mean_title: str
The title shown in the plot legend for historic mean values
layout_kwargs: dict
Additional kwargs passed to plotly.go.Figure(layout)
Expand Down
Loading
Loading