From 332e55f004c535389ae92d1a62410d7226522685 Mon Sep 17 00:00:00 2001
From: Sam Allen <34094291+sallen12@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:02:52 +0200
Subject: [PATCH] Add the log score and ranked logarithmic score (#72)

* add ranked probability score

* fix lint issues in brier tests

* fix lint issues in log score

* fix lint issues in rps

* change rps observations to be categories

* fix bug in torch rps

---------

Co-authored-by: Francesco Zanetta <zanetta.francesco@gmail.com>
---
 docs/api/brier.md          |  4 ++
 scoringrules/__init__.py   |  4 ++
 scoringrules/_brier.py     | 81 ++++++++++++++++++++++++++++++++++++++
 scoringrules/core/brier.py | 35 ++++++++++++++++
 tests/test_brier.py        | 33 ++++++++++++++++
 5 files changed, 157 insertions(+)

diff --git a/docs/api/brier.md b/docs/api/brier.md
index fe444dd..faad796 100644
--- a/docs/api/brier.md
+++ b/docs/api/brier.md
@@ -3,3 +3,7 @@
 ::: scoringrules.brier_score
 
 ::: scoringrules.rps_score
+
+::: scoringrules.log_score
+
+::: scoringrules.rls_score
diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py
index 7441332..7171757 100644
--- a/scoringrules/__init__.py
+++ b/scoringrules/__init__.py
@@ -3,6 +3,8 @@
 from scoringrules._brier import (
     brier_score,
     rps_score,
+    log_score,
+    rls_score,
 )
 from scoringrules._crps import (
     crps_beta,
@@ -153,6 +155,8 @@
     "logs_uniform",
     "brier_score",
     "rps_score",
+    "log_score",
+    "rls_score",
     "gksuv_ensemble",
     "gksmv_ensemble",
     "error_spread_score",
diff --git a/scoringrules/_brier.py b/scoringrules/_brier.py
index 9a3ec2e..e7152ba 100644
--- a/scoringrules/_brier.py
+++ b/scoringrules/_brier.py
@@ -88,7 +88,88 @@ def rps_score(
     return brier.rps_score(obs=observations, fct=forecasts, backend=backend)
 
 
+def log_score(
+    observations: "ArrayLike",
+    forecasts: "ArrayLike",
+    /,
+    *,
+    backend: "Backend" = None,
+) -> "Array":
+    r"""
+    Compute the Logarithmic Score (LS) for probability forecasts for binary outcomes.
+
+    The LS is formulated as
+
+    $$ LS(f, y) = -\log|f + y - 1|, $$
+
+    where $f \in [0, 1]$ is the predicted probability of an event and $y \in \{0, 1\}$ the actual outcome.
+
+    Parameters
+    ----------
+    observations: NDArray
+        Observed outcome, either 0 or 1.
+    forecasts : NDArray
+        Forecasted probabilities between 0 and 1.
+    backend: str
+        The name of the backend used for computations. Defaults to 'numpy'.
+
+    Returns
+    -------
+    score:
+        The computed Log Score.
+
+    """
+    return brier.log_score(obs=observations, fct=forecasts, backend=backend)
+
+
+def rls_score(
+    observations: "ArrayLike",
+    forecasts: "ArrayLike",
+    /,
+    axis: int = -1,
+    *,
+    backend: "Backend" = None,
+) -> "Array":
+    r"""
+    Compute the (Discrete) Ranked Logarithmic Score (RLS).
+
+    Suppose the outcome corresponds to one of $K$ ordered categories. The RLS is defined as
+
+    $$ RPS(f, y) = -\sum_{k=1}^{K} \log|\tilde{f}_{k} + \tilde{y}_{k} - 1|, $$
+
+    where $f \in [0, 1]^{K}$ is a vector of length $K$ containing forecast probabilities
+    that each of the $K$ categories will occur, and $y \in \{0, 1\}^{K}$ is a vector of
+    length $K$, with the $k$-th element equal to one if the $k$-th category occurs. We
+    have $\sum_{k=1}^{K} y_{k} = \sum_{k=1}^{K} f_{k} = 1$, and, for $k = 1, \dots, K$,
+    $\tilde{y}_{k} = \sum_{i=1}^{k} y_{i}$ and $\tilde{f}_{k} = \sum_{i=1}^{k} f_{i}$.
+
+    Parameters
+    ----------
+    observations: NDArray
+        Observed outcome, either 0 or 1.
+    forecasts : NDArray
+        Forecasted probabilities between 0 and 1.
+    backend: str
+        The name of the backend used for computations. Defaults to 'numpy'.
+
+    Returns
+    -------
+    score:
+        The computed Ranked Logarithmic Score.
+
+    """
+    B = backends.active if backend is None else backends[backend]
+    forecasts = B.asarray(forecasts)
+
+    if axis != -1:
+        forecasts = B.moveaxis(forecasts, axis, -1)
+
+    return brier.rls_score(obs=observations, fct=forecasts, backend=backend)
+
+
 __all__ = [
     "brier_score",
     "rps_score",
+    "log_score",
+    "rls_score",
 ]
diff --git a/scoringrules/core/brier.py b/scoringrules/core/brier.py
index 736bdbc..e66fbae 100644
--- a/scoringrules/core/brier.py
+++ b/scoringrules/core/brier.py
@@ -42,3 +42,38 @@ def rps_score(
     return B.sum(
         (B.cumsum(fct, axis=-1) - B.cumsum(obs_one_hot, axis=-1)) ** 2, axis=-1
     )
+
+
+def log_score(obs: "ArrayLike", fct: "ArrayLike", backend: "Backend" = None) -> "Array":
+    """Compute the Log Score for predicted probabilities of binary events."""
+    B = backends.active if backend is None else backends[backend]
+    obs, fct = map(B.asarray, (obs, fct))
+
+    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
+        raise ValueError("Forecasted probabilities must be within 0 and 1.")
+
+    if not set(v.item() for v in B.unique_values(obs)) <= {0, 1}:
+        raise ValueError("Observations must be 0, 1, or NaN.")
+
+    return B.asarray(-B.log(B.abs(fct + obs - 1.0)))
+
+
+def rls_score(
+    obs: "ArrayLike",
+    fct: "ArrayLike",
+    backend: "Backend" = None,
+) -> "Array":
+    """Compute the Ranked Logarithmic Score for ordinal categorical forecasts."""
+    B = backends.active if backend is None else backends[backend]
+    obs, fct = map(B.asarray, (obs, fct))
+
+    if B.any(fct < 0.0) or B.any(fct > 1.0 + EPSILON):
+        raise ValueError("Forecast probabilities must be between 0 and 1.")
+
+    categories = B.arange(1, fct.shape[-1] + 1)
+    obs_one_hot = B.where(B.expand_dims(obs, -1) == categories, 1, 0)
+
+    return B.sum(
+        -B.log(B.abs(B.cumsum(fct, axis=-1) + B.cumsum(obs_one_hot, axis=-1) - 1.0)),
+        axis=-1,
+    )
diff --git a/tests/test_brier.py b/tests/test_brier.py
index dd45920..c265b75 100644
--- a/tests/test_brier.py
+++ b/tests/test_brier.py
@@ -36,3 +36,36 @@ def test_rps(backend):
     res2 = _brier.rps_score(obs, fct, axis=0, backend=backend)
 
     assert np.allclose(res1, res2)
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_logs(backend):
+    # test correctness
+    obs, fct = 0, 0.481
+    res = _brier.log_score(obs, fct, backend=backend)
+    expected = 0.6558514
+    assert np.isclose(res, expected)
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_rls(backend):
+    # test correctness
+    obs, fct = 2, [0.21, 0.31, 0.32, 0.16]
+    res = _brier.rls_score(obs, fct, backend=backend)
+    expected = 1.064002
+    assert np.isclose(res, expected)
+
+    # test correctness
+    obs = [1, 3, 4, 2]
+    fct = [
+        [0.01, 0.32, 0.44, 0.23],
+        [0.12, 0.05, 0.48, 0.35],
+        [0.09, 0.21, 0.05, 0.65],
+        [0.57, 0.31, 0.08, 0.04],
+    ]
+    res1 = _brier.rls_score(obs, fct, backend=backend)
+
+    fct = np.transpose(fct)
+    res2 = _brier.rls_score(obs, fct, axis=0, backend=backend)
+
+    assert np.allclose(res1, res2)