From 332e55f004c535389ae92d1a62410d7226522685 Mon Sep 17 00:00:00 2001 From: Sam Allen <34094291+sallen12@users.noreply.github.com> Date: Mon, 7 Oct 2024 12:02:52 +0200 Subject: [PATCH] Add the log score and ranked logarithmic score (#72) * add ranked probability score * fix lint issues in brier tests * fix lint issues in log score * fix lint issues in rps * change rps observations to be categories * fix bug in torch rps --------- Co-authored-by: Francesco Zanetta --- docs/api/brier.md | 4 ++ scoringrules/__init__.py | 4 ++ scoringrules/_brier.py | 81 ++++++++++++++++++++++++++++++++++++++ scoringrules/core/brier.py | 35 ++++++++++++++++ tests/test_brier.py | 33 ++++++++++++++++ 5 files changed, 157 insertions(+) diff --git a/docs/api/brier.md b/docs/api/brier.md index fe444dd..faad796 100644 --- a/docs/api/brier.md +++ b/docs/api/brier.md @@ -3,3 +3,7 @@ ::: scoringrules.brier_score ::: scoringrules.rps_score + +::: scoringrules.log_score + +::: scoringrules.rls_score diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py index 7441332..7171757 100644 --- a/scoringrules/__init__.py +++ b/scoringrules/__init__.py @@ -3,6 +3,8 @@ from scoringrules._brier import ( brier_score, rps_score, + log_score, + rls_score, ) from scoringrules._crps import ( crps_beta, @@ -153,6 +155,8 @@ "logs_uniform", "brier_score", "rps_score", + "log_score", + "rls_score", "gksuv_ensemble", "gksmv_ensemble", "error_spread_score", diff --git a/scoringrules/_brier.py b/scoringrules/_brier.py index 9a3ec2e..e7152ba 100644 --- a/scoringrules/_brier.py +++ b/scoringrules/_brier.py @@ -88,7 +88,88 @@ def rps_score( return brier.rps_score(obs=observations, fct=forecasts, backend=backend) +def log_score( + observations: "ArrayLike", + forecasts: "ArrayLike", + /, + *, + backend: "Backend" = None, +) -> "Array": + r""" + Compute the Logarithmic Score (LS) for probability forecasts for binary outcomes. + + The LS is formulated as + + $$ LS(f, y) = -\log|f + y - 1|, $$ + + where $f \in [0, 1]$ is the predicted probability of an event and $y \in \{0, 1\}$ the actual outcome. + + Parameters + ---------- + observations: NDArray + Observed outcome, either 0 or 1. + forecasts : NDArray + Forecasted probabilities between 0 and 1. + backend: str + The name of the backend used for computations. Defaults to 'numpy'. + + Returns + ------- + score: + The computed Log Score. + + """ + return brier.log_score(obs=observations, fct=forecasts, backend=backend) + + +def rls_score( + observations: "ArrayLike", + forecasts: "ArrayLike", + /, + axis: int = -1, + *, + backend: "Backend" = None, +) -> "Array": + r""" + Compute the (Discrete) Ranked Logarithmic Score (RLS). + + Suppose the outcome corresponds to one of $K$ ordered categories. The RLS is defined as + + $$ RPS(f, y) = -\sum_{k=1}^{K} \log|\tilde{f}_{k} + \tilde{y}_{k} - 1|, $$ + + where $f \in [0, 1]^{K}$ is a vector of length $K$ containing forecast probabilities + that each of the $K$ categories will occur, and $y \in \{0, 1\}^{K}$ is a vector of + length $K$, with the $k$-th element equal to one if the $k$-th category occurs. We + have $\sum_{k=1}^{K} y_{k} = \sum_{k=1}^{K} f_{k} = 1$, and, for $k = 1, \dots, K$, + $\tilde{y}_{k} = \sum_{i=1}^{k} y_{i}$ and $\tilde{f}_{k} = \sum_{i=1}^{k} f_{i}$. + + Parameters + ---------- + observations: NDArray + Observed outcome, either 0 or 1. + forecasts : NDArray + Forecasted probabilities between 0 and 1. + backend: str + The name of the backend used for computations. Defaults to 'numpy'. + + Returns + ------- + score: + The computed Ranked Logarithmic Score. + + """ + B = backends.active if backend is None else backends[backend] + forecasts = B.asarray(forecasts) + + if axis != -1: + forecasts = B.moveaxis(forecasts, axis, -1) + + return brier.rls_score(obs=observations, fct=forecasts, backend=backend) + + __all__ = [ "brier_score", "rps_score", + "log_score", + "rls_score", ] diff --git a/scoringrules/core/brier.py b/scoringrules/core/brier.py index 736bdbc..e66fbae 100644 --- a/scoringrules/core/brier.py +++ b/scoringrules/core/brier.py @@ -42,3 +42,38 @@ def rps_score( return B.sum( (B.cumsum(fct, axis=-1) - B.cumsum(obs_one_hot, axis=-1)) ** 2, axis=-1 ) + + +def log_score(obs: "ArrayLike", fct: "ArrayLike", backend: "Backend" = None) -> "Array": + """Compute the Log Score for predicted probabilities of binary events.""" + B = backends.active if backend is None else backends[backend] + obs, fct = map(B.asarray, (obs, fct)) + + if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): + raise ValueError("Forecasted probabilities must be within 0 and 1.") + + if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}: + raise ValueError("Observations must be 0, 1, or NaN.") + + return B.asarray(-B.log(B.abs(fct + obs - 1.0))) + + +def rls_score( + obs: "ArrayLike", + fct: "ArrayLike", + backend: "Backend" = None, +) -> "Array": + """Compute the Ranked Logarithmic Score for ordinal categorical forecasts.""" + B = backends.active if backend is None else backends[backend] + obs, fct = map(B.asarray, (obs, fct)) + + if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON): + raise ValueError("Forecast probabilities must be between 0 and 1.") + + categories = B.arange(1, fct.shape[-1] + 1) + obs_one_hot = B.where(B.expand_dims(obs, -1) == categories, 1, 0) + + return B.sum( + -B.log(B.abs(B.cumsum(fct, axis=-1) + B.cumsum(obs_one_hot, axis=-1) - 1.0)), + axis=-1, + ) diff --git a/tests/test_brier.py b/tests/test_brier.py index dd45920..c265b75 100644 --- a/tests/test_brier.py +++ b/tests/test_brier.py @@ -36,3 +36,36 @@ def test_rps(backend): res2 = _brier.rps_score(obs, fct, axis=0, backend=backend) assert np.allclose(res1, res2) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_logs(backend): + # test correctness + obs, fct = 0, 0.481 + res = _brier.log_score(obs, fct, backend=backend) + expected = 0.6558514 + assert np.isclose(res, expected) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_rls(backend): + # test correctness + obs, fct = 2, [0.21, 0.31, 0.32, 0.16] + res = _brier.rls_score(obs, fct, backend=backend) + expected = 1.064002 + assert np.isclose(res, expected) + + # test correctness + obs = [1, 3, 4, 2] + fct = [ + [0.01, 0.32, 0.44, 0.23], + [0.12, 0.05, 0.48, 0.35], + [0.09, 0.21, 0.05, 0.65], + [0.57, 0.31, 0.08, 0.04], + ] + res1 = _brier.rls_score(obs, fct, backend=backend) + + fct = np.transpose(fct) + res2 = _brier.rls_score(obs, fct, axis=0, backend=backend) + + assert np.allclose(res1, res2)