Skip to content

Commit

Permalink
Merge pull request #47 from DHI/skill-class
Browse files Browse the repository at this point in the history
Skill class
  • Loading branch information
jsmariegaard authored May 20, 2021
2 parents 291bf4c + ae629d5 commit 118e4d1
Show file tree
Hide file tree
Showing 13 changed files with 1,475 additions and 687 deletions.
6 changes: 6 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ Compare
:inherited-members:
:exclude-members: keys, values, get, items

Skill
-------------
.. autoclass:: fmskill.skill.AggregatedSkill
:members:
:inherited-members:

Spatial Skill
-------------
.. autoclass:: fmskill.spatial.SpatialSkill
Expand Down
53 changes: 32 additions & 21 deletions fmskill/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import fmskill.metrics as mtr
from fmskill.observation import PointObservation, TrackObservation
from fmskill.plot import scatter
from fmskill.skill import AggregatedSkill
from fmskill.spatial import SpatialSkill


Expand Down Expand Up @@ -285,12 +286,14 @@ def _get_mod_id(self, model):
raise ValueError("model must be None, str or int")
return mod_id

def _parse_metric(self, metric):
def _parse_metric(self, metric, return_list=False):
if metric is None:
return [mtr.bias, mtr.rmse, mtr.urmse, mtr.mae, mtr.cc, mtr.si, mtr.r2]

if isinstance(metric, str):
valid_metrics = [x[0] for x in getmembers(mtr, isfunction)]
valid_metrics = [
x[0] for x in getmembers(mtr, isfunction) if x[0][0] != "_"
]

if metric.lower() in valid_metrics:
metric = getattr(mtr, metric.lower())
Expand All @@ -305,6 +308,9 @@ def _parse_metric(self, metric):
raise ValueError(
f"Invalid metric: {metric}. Must be either string or callable."
)
if return_list:
if callable(metric) or isinstance(metric, str):
metric = [metric]
return metric

def skill(
Expand All @@ -318,7 +324,7 @@ def skill(
end: Union[str, datetime] = None,
area: List[float] = None,
df: pd.DataFrame = None,
) -> pd.DataFrame:
) -> AggregatedSkill:
"""Aggregated skill assessment of model(s)
Parameters
Expand Down Expand Up @@ -389,7 +395,7 @@ def skill(
large 324 -0.23 0.38 0.30 0.28 0.96 0.09 0.99
"""

metrics = self._parse_metric(metrics)
metrics = self._parse_metric(metrics, return_list=True)

df = self.sel_df(
model=model,
Expand All @@ -408,7 +414,7 @@ def skill(

res = self._groupby_df(df.drop(columns=["x", "y"]), by, metrics)
res = self._add_as_field_if_not_in_index(df, skilldf=res)
return res
return AggregatedSkill(res)

def _add_as_field_if_not_in_index(
self, df, skilldf, fields=["model", "observation", "variable"]
Expand Down Expand Up @@ -563,9 +569,7 @@ def spatial_skill(
* y (y) float64 51.5 52.5 53.5 54.5 55.5 56.5
"""

metrics = self._parse_metric(metrics)
if callable(metrics) or isinstance(metrics, str):
metrics = [metrics]
metrics = self._parse_metric(metrics, return_list=True)

df = self.sel_df(
model=model,
Expand Down Expand Up @@ -928,7 +932,7 @@ def skill(
end: Union[str, datetime] = None,
area: List[float] = None,
df: pd.DataFrame = None,
) -> pd.DataFrame:
) -> AggregatedSkill:
"""Skill assessment of model(s)
Parameters
Expand Down Expand Up @@ -959,8 +963,8 @@ def skill(
Returns
-------
pd.DataFrame
skill assessment as a dataframe
AggregatedSkill
skill assessment object
See also
--------
Expand Down Expand Up @@ -1008,7 +1012,7 @@ def score(
end: Union[str, datetime] = None,
area: List[float] = None,
df: pd.DataFrame = None,
) -> pd.DataFrame:
) -> float:
"""Model skill score
Parameters
Expand Down Expand Up @@ -1049,6 +1053,8 @@ def score(
11.567399646108198
"""
metric = self._parse_metric(metric)
if not (callable(metric) or isinstance(metric, str)):
raise ValueError("metric must be a string or a function")

df = self.skill(
metrics=[metric],
Expand All @@ -1057,7 +1063,7 @@ def score(
end=end,
area=area,
df=df,
)
).df
values = df[metric.__name__].values
if len(values) == 1:
values = values[0]
Expand Down Expand Up @@ -1195,6 +1201,7 @@ def __init__(self, observation, modeldata):
else:
self.df[self.mod_names[j]] = df[self.mod_names[j]]

self.df.index.name = "datetime"
self.df.dropna(inplace=True)

def plot_timeseries(
Expand Down Expand Up @@ -1306,6 +1313,7 @@ def __init__(self, observation, modeldata):
else:
self.df[self.mod_names[j]] = df[self.mod_names[j]]

self.df.index.name = "datetime"
self.df = self.df.dropna()

def _obs_mod_xy_distance_acceptable(self, df_mod, df_obs):
Expand Down Expand Up @@ -1411,6 +1419,7 @@ def _construct_all_df(self):
res = res.append(df[cols])

self._all_df = res.sort_index()
self._all_df.index.name = "datetime"

def __init__(self):
self.comparers = {}
Expand Down Expand Up @@ -1495,7 +1504,7 @@ def mean_skill(
end: Union[str, datetime] = None,
area: List[float] = None,
df: pd.DataFrame = None,
) -> pd.DataFrame:
) -> AggregatedSkill:
"""Weighted mean skill of model(s) over all observations (of same variable)
Parameters
Expand Down Expand Up @@ -1527,8 +1536,8 @@ def mean_skill(
Returns
-------
pd.DataFrame
mean skill assessment as a dataframe
AggregatedSkill
mean skill assessment as a skill object
See also
--------
Expand Down Expand Up @@ -1562,8 +1571,8 @@ def mean_skill(
n_models = len(mod_names)

# skill assessment
metrics = self._parse_metric(metrics)
skilldf = self.skill(df=df, metrics=metrics)
metrics = self._parse_metric(metrics, return_list=True)
skilldf = self.skill(df=df, metrics=metrics).df

# weights
weights = self._parse_weights(weights, obs_names)
Expand All @@ -1581,7 +1590,7 @@ def mean_skill(

# output
res = self._add_as_field_if_not_in_index(df, res, fields=["model", "variable"])
return res.astype({"n": int})
return AggregatedSkill(res.astype({"n": int}))

def _mean_skill_by(self, skilldf, mod_names, var_names):
by = []
Expand Down Expand Up @@ -1647,7 +1656,7 @@ def score(
end: Union[str, datetime] = None,
area: List[float] = None,
df: pd.DataFrame = None,
) -> pd.DataFrame:
) -> float:
"""Weighted mean score of model(s) over all observations
NOTE: will take simple mean over different variables
Expand Down Expand Up @@ -1702,6 +1711,8 @@ def score(
8.414442957854142
"""
metric = self._parse_metric(metric)
if not (callable(metric) or isinstance(metric, str)):
raise ValueError("metric must be a string or a function")

if model is None:
models = self._mod_names
Expand All @@ -1720,7 +1731,7 @@ def score(
end=end,
area=area,
df=df,
)
).df

if n_models == 1:
score = df[metric.__name__].values.mean()
Expand Down
9 changes: 4 additions & 5 deletions fmskill/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from typing import Tuple
import warnings
import numpy as np
from scipy.stats import linregress
from scipy.stats import linregress as _linregress
import scipy.stats
from scipy import odr

Expand Down Expand Up @@ -203,7 +203,7 @@ def model_efficiency_factor(obs: np.ndarray, model: np.ndarray) -> float:
See Also
--------
nash_sutcliffe_efficiency
root_mean_square_error
root_mean_squared_error
"""
assert obs.size == model.size
Expand Down Expand Up @@ -240,14 +240,13 @@ def corrcoef(obs, model, weights=None) -> float:


def rho(obs: np.ndarray, model: np.ndarray) -> float:
"""alias for Spearman rank correlation coefficient"""
"""alias for spearmanr"""
return spearmanr(obs, model)


def spearmanr(obs: np.ndarray, model: np.ndarray) -> float:
"""Spearman rank correlation coefficient
The rank correlation coefficient is similar to the Pearson correlation coefficient but
applied to ranked quantities and is useful to quantify a monotonous relationship
Expand Down Expand Up @@ -341,7 +340,7 @@ def _linear_regression(
return np.nan

if reg_method == "ols":
reg = linregress(obs, model)
reg = _linregress(obs, model)
intercept = reg.intercept
slope = reg.slope
elif reg_method == "odr":
Expand Down
Loading

0 comments on commit 118e4d1

Please sign in to comment.