Skip to content

Commit

Permalink
Merge pull request #435 from DHI/Update_PR
Browse files Browse the repository at this point in the history
New Peak Ratio
  • Loading branch information
ecomodeller authored May 8, 2024
2 parents 5f3d000 + 17148c9 commit 72d5183
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 21 deletions.
56 changes: 38 additions & 18 deletions modelskill/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
>>> ev(obs, mod)
0.39614855570839064
"""

from __future__ import annotations
import inspect

Expand Down Expand Up @@ -531,8 +532,8 @@ def pr(
obs: pd.Series,
model: np.ndarray,
inter_event_level: float = 0.7,
AAP: int = 2,
inter_event_time="36h",
AAP: Union[int, float] = 2,
inter_event_time: str ="36h",
) -> float:
"""alias for peak_ratio"""
assert obs.size == model.size
Expand All @@ -543,20 +544,20 @@ def peak_ratio(
obs: pd.Series,
model: np.ndarray,
inter_event_level: float = 0.7,
AAP: int = 2,
inter_event_time="36h",
AAP: Union[int, float] = 2,
inter_event_time: str="36h",
) -> float:
r"""Peak Ratio
PR is the mean of the individual ratios of identified peaks in the
model / identified peaks in the measurements. PR is calculated only for the joint-events,
PR is the mean of the largest-N individual ratios of identified peaks in the
model / identified peaks in the measurements (N number of events defined by AAP). PR is calculated only for the joint-events,
ie, events that ocurr simulateneously within a window +/- 0.5*inter_event_time.
Parameters
----------
inter_event_level (float, optional)
Inter-event level threshold (default: 0.7).
AAP (float, optional)
AAP (int or float, optional)
Average Annual Peaks (ie, Number of peaks per year, on average). (default: 2)
inter_event_time (str, optional)
Maximum time interval between peaks (default: 36 hours).
Expand All @@ -577,24 +578,28 @@ def peak_ratio(
# Calculate number of years
dt_int = time[1:].values - time[0:-1].values
dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds
N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)
found_peaks = []
for data in [obs, model]:
peak_index, AAP_ = _partial_duration_series(
N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)
peak_index, AAP_ = _partial_duration_series(
time,
data,
obs,
inter_event_level=inter_event_level,
AAP=AAP,
inter_event_time=inter_event_time,
)
peaks = data[peak_index]
peaks_sorted = peaks.sort_values(ascending=False)
found_peaks.append(
peaks_sorted[0 : max(1, min(round(AAP_ * N_years), np.sum(peaks)))]
peaks = obs[peak_index]
found_peaks_obs = peaks.sort_values(ascending=False)

peak_index, _ = _partial_duration_series(
time,
model,
inter_event_level=inter_event_level,
AAP=AAP,
inter_event_time=inter_event_time,
)
found_peaks_obs = found_peaks[0]
found_peaks_mod = found_peaks[1]
peaks = model[peak_index]
found_peaks_mod = peaks.sort_values(ascending=False)

top_n_peaks = max(1, min(round(AAP_ * N_years), np.sum(peaks)))
# Resample~ish, find peaks spread maximum Half the inter event time (if inter event =36, select data paired +/- 18h) (or inter_event) and then select
indices_mod = (
abs(found_peaks_obs.index.values[:, None] - found_peaks_mod.index.values)
Expand All @@ -604,8 +609,23 @@ def peak_ratio(
abs(found_peaks_mod.index.values[:, None] - found_peaks_obs.index.values)
< pd.Timedelta(inter_event_time) / 2
).any(axis=0)
# Find intersection (co-existing peaks, still a large number, O(1000s))
obs_joint = found_peaks_obs.loc[indices_obs]
mod_joint = found_peaks_mod.loc[indices_mod]
# Now we forget about time index, as peaks have been paired already.
df_filter = pd.DataFrame(
data={
"model": mod_joint.sort_index().values,
"observation": obs_joint.sort_index().values,
}
)
df_filter["Maximum"] = df_filter.max(axis=1)
df_filter.sort_values(by="Maximum", ascending=False, inplace=True)
# Finally we do the selection of the N- largest peaks from either model or measured
df_filter = df_filter.iloc[0:top_n_peaks, :]
# Rename to avoid further refactoring
obs_joint = df_filter.loc[:, "observation"]
mod_joint = df_filter.loc[:, "model"]

if len(obs_joint) == 0 or len(mod_joint) == 0:
return np.nan
Expand Down
2 changes: 1 addition & 1 deletion tests/test_comparercollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def test_peak_ratio(cc):
def test_peak_ratio_2(cc_pr):
sk = cc_pr.skill(metrics=["peak_ratio"])
assert "peak_ratio" in sk.data.columns
assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.0799999095653732)
assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(0.88999995)


def test_copy(cc):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def test_pr(obs_series: pd.Series, mod_series: pd.Series) -> None:

pr = mtr.pr(obs, mod)

assert pr == pytest.approx(1.0799999095653732)
assert pr == pytest.approx(0.889999947851914)


def test_pr_2(obs_series, mod_series):
Expand All @@ -212,7 +212,7 @@ def test_pr_2(obs_series, mod_series):

pr = mtr.pr(obs, mod, AAP=8, inter_event_level=0.2)

assert pr == pytest.approx(1.0949999434255118)
assert pr == pytest.approx(0.947499960537655)


def test_metric_has_dimension():
Expand Down

0 comments on commit 72d5183

Please sign in to comment.