From 68465200e348dd3c99a37e22a2ceee9996796e9a Mon Sep 17 00:00:00 2001 From: AS Date: Sat, 4 May 2019 11:49:16 +0200 Subject: [PATCH 1/7] include xr-ed versions of properscorings crps_gaussian, crps_ensemble --- README.rst | 9 ++-- ci/requirements-py36.yml | 2 + setup.py | 3 +- xskillscore/__init__.py | 3 +- xskillscore/core/probabilistic.py | 39 +++++++++++++++++ xskillscore/tests/test_probabilistic.py | 56 +++++++++++++++++++++++++ 6 files changed, 104 insertions(+), 8 deletions(-) create mode 100644 xskillscore/core/probabilistic.py create mode 100644 xskillscore/tests/test_probabilistic.py diff --git a/README.rst b/README.rst index 81e80385..04197c19 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ xskillscore: Verification of forecasts .. image:: https://travis-ci.org/raybellwaves/xskillscore.svg?branch=master :target: https://travis-ci.org/raybellwaves/xskillscore -**xskillscore** is an open source project and Python package that provides verification metrics of deterministic (and probabilistic) forecasts with xarray. +**xskillscore** is an open source project and Python package that provides verification metrics of deterministic (and probabilistic from `properscoring`) forecasts with xarray. Installing ---------- @@ -27,9 +27,9 @@ Examples dims=['time', 'lat', 'lon']) fct = xr.DataArray(np.random.rand(3, 4, 5), coords=[pd.date_range('1/1/2000', '1/3/2000', freq='D'), - np.arange(4), np.arange(5)], + np.arange(4), np.arange(5)], dims=['time', 'lat', 'lon']) - + r = xs.pearson_r(obs, fct, 'time') #>>> r # @@ -40,7 +40,7 @@ Examples #Coordinates: # * lat (lat) int64 0 1 2 3 # * lon (lon) int64 0 1 2 3 4 - + rmse = xs.rmse(obs, fct, 'time') #>>> rmse # @@ -51,4 +51,3 @@ Examples #Coordinates: # * lat (lat) int64 0 1 2 3 # * lon (lon) int64 0 1 2 3 4 - diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 9deaaefe..b8acbe89 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -8,3 +8,5 @@ dependencies: - dask - scipy - pytest + - pip: + - properscoring diff --git a/setup.py b/setup.py index c74ee83f..77da37cd 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ from setuptools import find_packages, setup - DISTNAME = 'xskillscore' VERSION = '0.0.2' LICENSE = 'Apache' @@ -8,7 +7,7 @@ AUTHOR_EMAIL = 'rjbell1987@gmail.com' DESCRIPTION = "xskillscore" URL = 'https://github.com/raybellwaves/xskillscore' -INSTALL_REQUIRES = ['scikit-learn', 'xarray', 'dask', 'scipy'] +INSTALL_REQUIRES = ['scikit-learn', 'xarray', 'dask', 'scipy','properscoring'] TESTS_REQUIRE = ['pytest'] PYTHON_REQUIRE = '>=3.6' diff --git a/xskillscore/__init__.py b/xskillscore/__init__.py index 471b95b6..0aa1b57c 100644 --- a/xskillscore/__init__.py +++ b/xskillscore/__init__.py @@ -1,2 +1,3 @@ # flake8: noqa -from .core.deterministic import pearson_r, pearson_r_p_value, rmse, mse, mae +from .core.deterministic import mae, mse, pearson_r, pearson_r_p_value, rmse +from .core.probabilistic import xr_crps_ensemble, xr_crps_gaussian diff --git a/xskillscore/core/probabilistic.py b/xskillscore/core/probabilistic.py new file mode 100644 index 00000000..a1409bbe --- /dev/null +++ b/xskillscore/core/probabilistic.py @@ -0,0 +1,39 @@ +import xarray as xr +from properscoring import crps_ensemble, crps_gaussian + + +def xr_crps_gaussian(observations, mu, sig): + """ + + + See Also + -------- + properscoring.crps_gaussian + xarray.apply_ufunc + """ + # check if same dimensions + if mu.dims != observations.dims: + observations, mu = xr.broadcast(observations, mu) + if sig.dims != observations.dims: + observations, sig = xr.broadcast(observations, sig) + return xr.apply_ufunc(crps_gaussian, observations, mu, sig, + input_core_dims=[[], [], []], + dask='parallelized', + output_dtypes=[float]) + + +def xr_crps_ensemble(observations, forecasts): + """ + + + See Also + -------- + properscoring.crps_ensemble + xarray.apply_ufunc + """ + if forecasts.dims != observations.dims: + observations, forecasts = xr.broadcast(observations, forecasts) + return xr.apply_ufunc(crps_ensemble, observations, forecasts, + input_core_dims=[[], []], + dask='parallelized', + output_dtypes=[float]) diff --git a/xskillscore/tests/test_probabilistic.py b/xskillscore/tests/test_probabilistic.py new file mode 100644 index 00000000..9e6cd045 --- /dev/null +++ b/xskillscore/tests/test_probabilistic.py @@ -0,0 +1,56 @@ +import numpy as np +import pandas as pd +import pytest +import xarray as xr +from properscoring import crps_ensemble, crps_gaussian +from xarray.tests import assert_allclose, assert_identical + +from xskillscore.core.probabilistic import xr_crps_ensemble, xr_crps_gaussian + + +@pytest.fixture +def a_dask(): + dates = pd.date_range('1/1/2000', '1/3/2000', freq='D') + lats = np.arange(4) + lons = np.arange(5) + data = np.random.rand(len(dates), len(lats), len(lons)) + return xr.DataArray(data, + coords=[dates, lats, lons], + dims=['time', 'lat', 'lon']).chunk() + + +@pytest.fixture +def b_dask(): + dates = pd.date_range('1/1/2000', '1/3/2000', freq='D') + lats = np.arange(4) + lons = np.arange(5) + data = np.random.rand(len(dates), len(lats), len(lons)) + return xr.DataArray(data, + coords=[dates, lats, lons], + dims=['time', 'lat', 'lon']).chunk() + + +def test_xr_crps_ensemble_dask(a_dask, b_dask): + actual = xr_crps_ensemble(a_dask, b_dask) + expected = crps_ensemble(a_dask, b_dask) + expected = xr.DataArray(expected, coords=a_dask.coords) + # test for numerical identity of xr_crps and crps + assert_identical(actual, expected) + # test that xr_crps_ensemble returns chunks + assert actual.chunks is not None + # show that crps_ensemble returns no chunks + assert expected.chunks is None + + +def test_xr_crps_gaussian_dask(a_dask, b_dask): + mu = b_dask.mean('time') + sig = b_dask.std('time') + actual = xr_crps_gaussian(a_dask, mu, sig) + expected = crps_gaussian(a_dask, mu, sig) + expected = xr.DataArray(expected, coords=a_dask.coords) + # test for numerical identity of xr_crps and crps + assert_identical(actual, expected) + # test that xr_crps_ensemble returns chunks + assert actual.chunks is not None + # show that crps_ensemble returns no chunks + assert expected.chunks is None From e452f8442043607bcfb6d9c3ea3244c1d4fbbeef Mon Sep 17 00:00:00 2001 From: AS Date: Sat, 4 May 2019 12:32:49 +0200 Subject: [PATCH 2/7] add example --- README.rst | 11 +++++++++-- xskillscore/__init__.py | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 870fff9b..9bef54a7 100644 --- a/README.rst +++ b/README.rst @@ -35,6 +35,7 @@ Examples np.arange(4), np.arange(5)], dims=['time', 'lat', 'lon']) + # deterministic r = xs.pearson_r(obs, fct, 'time') #>>> r # @@ -45,11 +46,17 @@ Examples #Coordinates: # * lat (lat) int64 0 1 2 3 # * lon (lon) int64 0 1 2 3 4 - + r_p_value = xs.pearson_r_p_value(obs, fct, 'time') rmse = xs.rmse(obs, fct, 'time') mse = xs.mse(obs, fct, 'time') - mae = xs.mae(obs, fct, 'time') + mae = xs.mae(obs, fct, 'time') + + + # probabilistic + crps_ensemble = xs.crps_ensemble(obs, fct) + + crps_gaussian = xs.crps_gaussian(obs, fct.mean('time'), fct.std('time')) diff --git a/xskillscore/__init__.py b/xskillscore/__init__.py index 0aa1b57c..5c85b5a4 100644 --- a/xskillscore/__init__.py +++ b/xskillscore/__init__.py @@ -1,3 +1,4 @@ # flake8: noqa from .core.deterministic import mae, mse, pearson_r, pearson_r_p_value, rmse -from .core.probabilistic import xr_crps_ensemble, xr_crps_gaussian +from .core.probabilistic import xr_crps_ensemble as crps_ensemble +from .core.probabilistic import xr_crps_gaussian as crps_gaussian From b9e661bda07c972275ea0164601db18a6c6d43a9 Mon Sep 17 00:00:00 2001 From: AS Date: Wed, 8 May 2019 20:50:23 +0200 Subject: [PATCH 3/7] added doc strings --- xskillscore/core/probabilistic.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/xskillscore/core/probabilistic.py b/xskillscore/core/probabilistic.py index a1409bbe..9f30ea53 100644 --- a/xskillscore/core/probabilistic.py +++ b/xskillscore/core/probabilistic.py @@ -4,7 +4,21 @@ def xr_crps_gaussian(observations, mu, sig): """ + xarray version of properscoring.crps_gaussian. + Parameters + ---------- + observations : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or + scalars, Mix of labeled and/or unlabeled observations arrays. + mu : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or + scalars, Mix of labeled and/or unlabeled forecasts mean arrays. + sig : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or + scalars, Mix of labeled and/or unlabeled forecasts mean arrays. + + Returns + ------- + Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or + numpy.ndarray, the first type on that list to appear on an input. See Also -------- @@ -24,7 +38,19 @@ def xr_crps_gaussian(observations, mu, sig): def xr_crps_ensemble(observations, forecasts): """ + xarray version of properscoring.crps_ensemble. + + Parameters + ---------- + observations : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or + scalars, Mix of labeled and/or unlabeled observations arrays. + forecasts : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or + scalars, Mix of labeled and/or unlabeled forecasts arrays. + Returns + ------- + Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or + numpy.ndarray, the first type on that list to appear on an input. See Also -------- From 616aaf455aa6a1a7a1139e2619823ac2159eb907 Mon Sep 17 00:00:00 2001 From: AS Date: Wed, 8 May 2019 22:37:35 +0200 Subject: [PATCH 4/7] add threshold_brier_score --- README.rst | 4 +- xskillscore/__init__.py | 2 + xskillscore/core/probabilistic.py | 51 ++++++++++++++++++++++++- xskillscore/tests/test_probabilistic.py | 19 ++++++++- 4 files changed, 72 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 9bef54a7..6c573a3e 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ xskillscore: Metrics for verifying forecasts .. image:: https://travis-ci.org/raybellwaves/xskillscore.svg?branch=master :target: https://travis-ci.org/raybellwaves/xskillscore -**xskillscore** is an open source project and Python package that provides verification metrics of deterministic (and probabilistic from `properscoring`) forecasts with xarray. +**xskillscore** is an open source project and Python package that provides verification metrics of deterministic (and probabilistic from `properscoring`) forecasts with `xarray`. Installing ---------- @@ -60,3 +60,5 @@ Examples crps_ensemble = xs.crps_ensemble(obs, fct) crps_gaussian = xs.crps_gaussian(obs, fct.mean('time'), fct.std('time')) + + threshold_brier_score = xs.threshold_brier_score(obs, fct, 0.7) diff --git a/xskillscore/__init__.py b/xskillscore/__init__.py index 5c85b5a4..359128fe 100644 --- a/xskillscore/__init__.py +++ b/xskillscore/__init__.py @@ -2,3 +2,5 @@ from .core.deterministic import mae, mse, pearson_r, pearson_r_p_value, rmse from .core.probabilistic import xr_crps_ensemble as crps_ensemble from .core.probabilistic import xr_crps_gaussian as crps_gaussian +from .core.probabilistic import \ + xr_threshold_brier_score as threshold_brier_score diff --git a/xskillscore/core/probabilistic.py b/xskillscore/core/probabilistic.py index 9f30ea53..911139bf 100644 --- a/xskillscore/core/probabilistic.py +++ b/xskillscore/core/probabilistic.py @@ -1,5 +1,5 @@ import xarray as xr -from properscoring import crps_ensemble, crps_gaussian +from properscoring import crps_ensemble, crps_gaussian, threshold_brier_score def xr_crps_gaussian(observations, mu, sig): @@ -63,3 +63,52 @@ def xr_crps_ensemble(observations, forecasts): input_core_dims=[[], []], dask='parallelized', output_dtypes=[float]) + + +def xr_threshold_brier_score(observations, forecasts, threshold, issorted=False, axis=-1): + """ + xarray version of properscoring.threshold_brier_score: Calculate the Brier scores of an ensemble for exceeding given thresholds. + + Parameters + ---------- + observations : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or + scalars, Mix of labeled and/or unlabeled observations arrays. + forecasts : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or + scalars, Mix of labeled and/or unlabeled forecasts arrays. + threshold : scalar (not yet implemented: or 1d scalar threshold value(s) at + which to calculate) exceedence Brier scores. + issorted : bool, optional + Optimization flag to indicate that the elements of `ensemble` are + already sorted along `axis`. + axis : int, optional + Axis in forecasts which corresponds to different ensemble members, + along which to calculate the threshold decomposition. + + + Returns + ------- + Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or + numpy.ndarray, the first type on that list to appear on an input. (If + ``threshold`` is a scalar, the result will have the same shape as + observations. Otherwise, it will have an additional final dimension + corresponding to the threshold levels.) + + References + ---------- + Gneiting, T. and Ranjan, R. Comparing density forecasts using threshold- + and quantile-weighted scoring rules. J. Bus. Econ. Stat. 29, 411-422 + (2011). http://www.stat.washington.edu/research/reports/2008/tr533.pdf + + See Also + -------- + properscoring.threshold_brier_score + xarray.apply_ufunc + """ + if forecasts.dims != observations.dims: + observations, forecasts = xr.broadcast(observations, forecasts) + return xr.apply_ufunc(threshold_brier_score, + observations, forecasts, threshold, + input_core_dims=[[], [], []], + kwargs={'axis': axis, 'issorted': issorted}, + dask='parallelized', + output_dtypes=[float]) diff --git a/xskillscore/tests/test_probabilistic.py b/xskillscore/tests/test_probabilistic.py index 9e6cd045..0f01a9c7 100644 --- a/xskillscore/tests/test_probabilistic.py +++ b/xskillscore/tests/test_probabilistic.py @@ -2,10 +2,12 @@ import pandas as pd import pytest import xarray as xr -from properscoring import crps_ensemble, crps_gaussian +from properscoring import (brier_score, crps_ensemble, crps_gaussian, + threshold_brier_score) from xarray.tests import assert_allclose, assert_identical -from xskillscore.core.probabilistic import xr_crps_ensemble, xr_crps_gaussian +from xskillscore.core.probabilistic import (xr_crps_ensemble, xr_crps_gaussian, + xr_threshold_brier_score) @pytest.fixture @@ -54,3 +56,16 @@ def test_xr_crps_gaussian_dask(a_dask, b_dask): assert actual.chunks is not None # show that crps_ensemble returns no chunks assert expected.chunks is None + + +def test_xr_threshold_brier_score_dask(a_dask, b_dask): + threshold = .5 + actual = xr_threshold_brier_score(a_dask, b_dask, threshold) + expected = threshold_brier_score(a_dask, b_dask, threshold) + expected = xr.DataArray(expected, coords=a_dask.coords) + # test for numerical identity of xr_threshold and threshold + assert_identical(actual, expected) + # test that xr_crps_ensemble returns chunks + assert actual.chunks is not None + # show that crps_ensemble returns no chunks + assert expected.chunks is None From a9ba98ad7e4df4f03e7690b5d64bc8d99bf5a764 Mon Sep 17 00:00:00 2001 From: AS Date: Mon, 13 May 2019 10:43:44 +0200 Subject: [PATCH 5/7] cosmetics --- xskillscore/core/probabilistic.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/xskillscore/core/probabilistic.py b/xskillscore/core/probabilistic.py index 911139bf..fab2cbfd 100644 --- a/xskillscore/core/probabilistic.py +++ b/xskillscore/core/probabilistic.py @@ -18,7 +18,7 @@ def xr_crps_gaussian(observations, mu, sig): Returns ------- Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or - numpy.ndarray, the first type on that list to appear on an input. + numpy.ndarray, the first type on that list to appear on an input. See Also -------- @@ -30,7 +30,10 @@ def xr_crps_gaussian(observations, mu, sig): observations, mu = xr.broadcast(observations, mu) if sig.dims != observations.dims: observations, sig = xr.broadcast(observations, sig) - return xr.apply_ufunc(crps_gaussian, observations, mu, sig, + return xr.apply_ufunc(crps_gaussian, + observations, + mu, + sig, input_core_dims=[[], [], []], dask='parallelized', output_dtypes=[float]) @@ -59,15 +62,22 @@ def xr_crps_ensemble(observations, forecasts): """ if forecasts.dims != observations.dims: observations, forecasts = xr.broadcast(observations, forecasts) - return xr.apply_ufunc(crps_ensemble, observations, forecasts, + return xr.apply_ufunc(crps_ensemble, + observations, + forecasts, input_core_dims=[[], []], dask='parallelized', output_dtypes=[float]) -def xr_threshold_brier_score(observations, forecasts, threshold, issorted=False, axis=-1): +def xr_threshold_brier_score(observations, + forecasts, + threshold, + issorted=False, + axis=-1): """ - xarray version of properscoring.threshold_brier_score: Calculate the Brier scores of an ensemble for exceeding given thresholds. + xarray version of properscoring.threshold_brier_score: Calculate the Brier + scores of an ensemble for exceeding given thresholds. Parameters ---------- @@ -107,8 +117,13 @@ def xr_threshold_brier_score(observations, forecasts, threshold, issorted=False, if forecasts.dims != observations.dims: observations, forecasts = xr.broadcast(observations, forecasts) return xr.apply_ufunc(threshold_brier_score, - observations, forecasts, threshold, + observations, + forecasts, + threshold, input_core_dims=[[], [], []], - kwargs={'axis': axis, 'issorted': issorted}, + kwargs={ + 'axis': axis, + 'issorted': issorted + }, dask='parallelized', output_dtypes=[float]) From b0d4b973db752b8cda54e1488ff695bbc73332f4 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Mon, 13 May 2019 22:53:26 -0400 Subject: [PATCH 6/7] clean up --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e5fa22e1..8da9ab81 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ DESCRIPTION = "xskillscore" LONG_DESCRIPTION = """Metrics for verifying forecasts""" URL = 'https://github.com/raybellwaves/xskillscore' -INSTALL_REQUIRES = ['scikit-learn', 'xarray', 'dask', 'scipy','properscoring'] +INSTALL_REQUIRES = ['scikit-learn', 'xarray', 'dask', 'scipy', 'properscoring'] TESTS_REQUIRE = ['pytest'] PYTHON_REQUIRE = '>=3.6' From 90f2c85380eed9edae3f057467b05ec9312ed7d2 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Mon, 13 May 2019 22:54:02 -0400 Subject: [PATCH 7/7] clean up --- xskillscore/tests/test_probabilistic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xskillscore/tests/test_probabilistic.py b/xskillscore/tests/test_probabilistic.py index 0f01a9c7..63f18608 100644 --- a/xskillscore/tests/test_probabilistic.py +++ b/xskillscore/tests/test_probabilistic.py @@ -2,9 +2,9 @@ import pandas as pd import pytest import xarray as xr -from properscoring import (brier_score, crps_ensemble, crps_gaussian, +from properscoring import (crps_ensemble, crps_gaussian, threshold_brier_score) -from xarray.tests import assert_allclose, assert_identical +from xarray.tests import assert_identical from xskillscore.core.probabilistic import (xr_crps_ensemble, xr_crps_gaussian, xr_threshold_brier_score)