Add log score for ensemble forecasts (#74)

* fix lint issues in crps for mixture of normals * add log score for mixture of normals * add log score for ensemble forecasts * fix bug in log score for ensembles in torch * fix bug in log score for ensembles in torch
frazane · Oct 3, 2024 · 7964e7e · 7964e7e
1 parent 49ee8de
commit 7964e7e
Show file tree

Hide file tree

Showing 8 changed files with 197 additions and 0 deletions.
diff --git a/docs/api/logarithmic.md b/docs/api/logarithmic.md
@@ -4,6 +4,8 @@
 
 ::: scoringrules.logs_binomial
 
+::: scoringrules.logs_ensemble
+
 ::: scoringrules.logs_exponential
 
 ::: scoringrules.logs_exponential2

diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py
@@ -49,6 +49,7 @@
 from scoringrules._logs import (
     logs_beta,
     logs_binomial,
+    logs_ensemble,
     logs_exponential,
     logs_exponential2,
     logs_2pexponential,
@@ -121,6 +122,7 @@
     "vrcrps_ensemble",
     "logs_beta",
     "logs_binomial",
+    "logs_ensemble",
     "logs_exponential",
     "logs_exponential2",
     "logs_2pexponential",

diff --git a/scoringrules/_logs.py b/scoringrules/_logs.py
@@ -7,6 +7,71 @@
     from scoringrules.core.typing import Array, ArrayLike, Backend
 
 
+def logs_ensemble(
+    observations: "ArrayLike",
+    forecasts: "Array",
+    /,
+    axis: int = -1,
+    *,
+    bw: "ArrayLike" = None,
+    backend: "Backend" = None,
+) -> "Array":
+    r"""Estimate the Logarithmic score for a finite ensemble via kernel density estimation.
+
+    Gaussian kernel density estimation is used to convert the finite ensemble to a
+    mixture of normal distributions, with the component distributions centred at each
+    ensemble member, with scale equal to the bandwidth parameter 'bw'.
+
+    The log score for the ensemble forecast is then the log score for the mixture of
+    normal distributions.
+
+    Parameters
+    ----------
+    observations: ArrayLike
+        The observed values.
+    forecasts: ArrayLike
+        The predicted forecast ensemble, where the ensemble dimension is by default
+        represented by the last axis.
+    axis: int
+        The axis corresponding to the ensemble. Default is the last axis.
+    bw : ArrayLike
+        The bandwidth parameter for each forecast ensemble. If not given, estimated using
+        Silverman's rule of thumb.
+    backend: str
+        The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
+
+    Returns
+    -------
+    score:
+        The LS between the forecast ensemble and obs.
+
+    Examples
+    --------
+    >>> import scoringrules as sr
+    >>> sr.logs_ensemble(obs, pred)
+    """
+    B = backends.active if backend is None else backends[backend]
+    observations, forecasts = map(B.asarray, (observations, forecasts))
+
+    if axis != -1:
+        forecasts = B.moveaxis(forecasts, axis, -1)
+
+    M = forecasts.shape[-1]
+
+    # Silverman's rule of thumb for estimating the bandwidth parameter
+    if bw is None:
+        sigmahat = B.std(forecasts, axis=-1)
+        q75 = B.quantile(forecasts, 0.75, axis=-1)
+        q25 = B.quantile(forecasts, 0.25, axis=-1)
+        iqr = q75 - q25
+        bw = 1.06 * B.minimum(sigmahat, iqr / 1.34) * (M ** (-1 / 5))
+    bw = B.stack([bw] * M, axis=-1)
+
+    w = B.zeros(forecasts.shape) + 1 / M
+
+    return logarithmic.mixnorm(observations, forecasts, bw, w, backend=backend)
+
+
 def logs_beta(
     observation: "ArrayLike",
     a: "ArrayLike",

diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py
@@ -47,6 +47,27 @@ def mean(
     ) -> "Array":
         return jnp.mean(x, axis=axis, keepdims=keepdims)
 
+    def std(
+        self,
+        x: "Array",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "Array":
+        return jnp.std(x, ddof=1, axis=axis, keepdims=keepdims)
+
+    def quantile(
+        self,
+        x: "Array",
+        q: "ArrayLike",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "Array":
+        return jnp.quantile(x, q, axis=axis, keepdims=keepdims)
+
     def max(
         self, x: "Array", axis: int | tuple[int, ...] | None, keepdims: bool = False
     ) -> "Array":

diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
@@ -47,6 +47,27 @@ def mean(
     ) -> "NDArray":
         return np.mean(x, axis=axis, keepdims=keepdims)
 
+    def std(
+        self,
+        x: "NDArray",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "NDArray":
+        return np.std(x, ddof=1, axis=axis, keepdims=keepdims)
+
+    def quantile(
+        self,
+        x: "NDArray",
+        q: "ArrayLike",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "NDArray":
+        return np.quantile(x, q, axis=axis, keepdims=keepdims)
+
     def max(
         self, x: "NDArray", axis: int | tuple[int, ...] | None, keepdims: bool = False
     ) -> "NDArray":

diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py
@@ -52,6 +52,29 @@ def mean(
     ) -> "Tensor":
         return tf.math.reduce_mean(x, axis=axis, keepdims=keepdims)
 
+    def std(
+        self,
+        x: "Tensor",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "Tensor":
+        n = x.shape.num_elements() if axis is None else x.shape[axis]
+        resc = self.sqrt(n / (n - 1))
+        return tf.math.reduce_std(x, axis=axis, keepdims=keepdims) * resc
+
+    def quantile(
+        self,
+        x: "Tensor",
+        q: "TensorLike",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "Tensor":
+        raise NotImplementedError
+
     def max(
         self, x: "Tensor", axis: int | tuple[int, ...] | None, keepdims: bool = False
     ) -> "Tensor":
@@ -76,6 +99,14 @@ def sum(
     ) -> "Tensor":
         return tf.math.reduce_sum(x, axis=axis, keepdims=keepdims)
 
+    def cumsum(
+        self,
+        x: "Tensor",
+        /,
+        axis: int | tuple[int, ...] | None = None,
+    ) -> "Tensor":
+        return tf.math.cumsum(x, axis=axis)
+
     def unique_values(self, x: "Tensor", /) -> "Tensor":
         return tf.unique(x)
 
@@ -264,6 +295,12 @@ def where(self, condition: "Tensor", x1: "Tensor", x2: "Tensor") -> "Tensor":
     def size(self, x: "Tensor") -> int:
         return x.shape.num_elements()
 
+    def indices(self, dimensions: tuple) -> "Tensor":
+        ranges = [self.arange(s) for s in dimensions]
+        index_grids = tf.meshgrid(*ranges, indexing="ij")
+        indices = tf.stack(index_grids)
+        return indices
+
 
 if __name__ == "__main__":
     B = TensorflowBackend()

diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py
@@ -47,6 +47,27 @@ def mean(
     ) -> "Tensor":
         return torch.mean(x, axis=axis, keepdim=keepdims)
 
+    def std(
+        self,
+        x: "Tensor",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "Tensor":
+        return torch.std(x, correction=1, axis=axis, keepdim=keepdims)
+
+    def quantile(
+        self,
+        x: "Tensor",
+        q: "TensorLike",
+        /,
+        *,
+        axis: int | tuple[int, ...] | None = None,
+        keepdims: bool = False,
+    ) -> "Tensor":
+        return torch.quantile(x, q, dim=axis, keepdim=keepdims)
+
     def max(
         self, x: "Tensor", axis: int | tuple[int, ...] | None, keepdims: bool = False
     ) -> "Tensor":

diff --git a/tests/test_logs.py b/tests/test_logs.py
@@ -4,6 +4,34 @@
 
 from .conftest import BACKENDS
 
+ENSEMBLE_SIZE = 51
+N = 100
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_ensemble(backend):
+    obs = np.random.randn(N)
+    mu = obs + np.random.randn(N) * 0.1
+    sigma = abs(np.random.randn(N))
+    fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
+
+    res = _logs.logs_ensemble(obs, fct, axis=-1, backend=backend)
+    assert res.shape == (N,)
+
+    fct = fct.T
+    res0 = _logs.logs_ensemble(obs, fct, axis=0, backend=backend)
+    assert np.allclose(res, res0)
+
+    obs, fct = 6.2, [4.2, 5.1, 6.1, 7.6, 8.3, 9.5]
+    res = _logs.logs_ensemble(obs, fct, backend=backend)
+    expected = 1.926475
+    assert np.isclose(res, expected)
+
+    fct = [-142.7, -160.3, -179.4, -184.5]
+    res = _logs.logs_ensemble(obs, fct, backend=backend)
+    expected = 55.25547
+    assert np.isclose(res, expected)
+
 
 @pytest.mark.parametrize("backend", BACKENDS)
 def test_beta(backend):