Skip to content

Commit

Permalink
New changes for version 0.78, see changes in change log
Browse files Browse the repository at this point in the history
  • Loading branch information
wade committed Apr 24, 2019
1 parent 386b1fc commit 32992bd
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 25 deletions.
7 changes: 7 additions & 0 deletions docs/change_log.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ Release Notes
This is the list of changes to Hydrostats between each release. For full details, see the commit logs at
https://github.com/BYU-Hydroinformatics/Hydrostats.

Version 0.78
^^^^^^^^^^^^
- Added the ability to use different thresholds for the ensemble forecast for the observed and ensemble forecast data in
the hydrostats.ens_metrics.auroc() and hydrostats.ens_metrics.ens_brier() methods.
- Changes to documentation to reflect the addition of the .name and .abbr properties to metrics from the HydroErr
package.

Version 0.77
^^^^^^^^^^^^
- Added a new rolling average feature to the hydrostats.data.daily_average(). Set rolling=True as a parameter to use the
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __getattr__(cls, name):
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '0.77'
release = '0.78'

# -- General configuration ---------------------------------------------------

Expand Down
16 changes: 9 additions & 7 deletions hydrostats/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def make_table(merged_dataframe, metrics, seasonal_periods=None, mase_m=1, dmod_
metrics: list of str
A list of all the metrics that the user wants to calculate. The metrics abbreviations must
be used (e.g. the abbreviation for the mean error is "ME". Each function has an attribute with the name
and abbreviation, so this can be used instead (see example).
and abbreviation, so this can be used instead (see example). Also, strings can be typed and found in the
quick reference table in this documentation.
seasonal_periods: 2D list of str, optional
If given, specifies the seasonal periods that the user wants to analyze (e.g. [['06-01',
Expand Down Expand Up @@ -132,7 +133,7 @@ def make_table(merged_dataframe, metrics, seasonal_periods=None, mase_m=1, dmod_
Here we make a table and print the results:
>>> my_metrics = [mae.abbr, r_squared.abbr, nse.abbr, kge_2012.abbr]
>>> my_metrics = [mae.abbr, r_squared.abbr, nse.abbr, kge_2012.abbr] # HydroErr 1.24 or greater is required to use these properties
>>> seasonal = [['01-01', '03-31'], ['04-01', '06-30'], ['07-01', '09-30'], ['10-01', '12-31']]
>>> table = ha.make_table(merged_df, my_metrics, seasonal, remove_neg=True, remove_zero=True, location='Magdalena')
>>> table
Expand Down Expand Up @@ -221,7 +222,8 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
with a datetime index.
metrics: list of str
Metric abbreviations that the user would like to use in their lag analysis.
Metric abbreviations that the user would like to use in their lag analysis. Each metric function has a property
that contains their abbreviation for convenience (see example).
interp_freq: str, optional
Frequency of the interpolation for both the simulated and observed time series.
Expand Down Expand Up @@ -324,7 +326,7 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
>>> import hydrostats.analyze as ha
>>> import hydrostats.data as hd
>>> pd.options.display.max_columns = 50
>>> from hydrostats.metrics import me, r_squared, rmse, kge_2012, nse, dr
>>>
>>> # Defining the URLs of the datasets
>>> sfpt_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/sfpt_data/magdalena-calamar_interim_data.csv'
Expand All @@ -334,8 +336,8 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
There are two dataframes that are returned as part of the analysis.
>>> # Running the lag analysis
>>> time_lag_df, summary_df = ha.time_lag(merged_df, metrics=['ME', 'r2', 'RMSE', 'KGE (2012)', 'NSE'])
>>> # Running the lag analysis, not that HydroErr > 1.24 must be used to access the .abbr property
>>> time_lag_df, summary_df = ha.time_lag(merged_df, metrics=[me.abbr, r_squared.abbr, rmse.abbr, kge_2012.abbr, nse.abbr])
>>> summary_df
Max Max Lag Number Min Min Lag Number
ME 174.740510 -28.0 174.740510 -24.0
Expand All @@ -347,7 +349,7 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
A plot can be created that visualizes the different metrics throughout the time lags. It can be
saved using the savefig parameter as well if desired.
>>> _, _ = ha.time_lag(merged_df, metrics=['r2', 'KGE (2012)', 'dr'], plot=True)
>>> _, _ = ha.time_lag(merged_df, metrics=[r_squared.abbr, kge_2012.abbr, dr.abbr], plot=True)
.. image:: /Figures/lag_plot1.png
Expand Down
83 changes: 67 additions & 16 deletions hydrostats/ens_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,8 @@ def crps_kernel(obs, fcst_ens, remove_neg=False, remove_zero=False):
return output


def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=None, adj=None):
def ens_brier(fcst_ens=None, obs=None, threshold=None, ens_threshold=None, obs_threshold=None, fcst_ens_bin=None,
obs_bin=None, adj=None):
"""
Calculate the ensemble-adjusted Brier Score.
Expand All @@ -838,6 +839,14 @@ def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bi
The threshold for an event (e.g. if the event is a 100 year flood, the streamflow value
that a 100 year flood would have to exceed.
ens_threshold: float
If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
set along with the 'obs_threshold' parameter to set different thresholds.
obs_threshold: float
If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
set along with the 'ens_threshold' parameter to set different thresholds.
fcst_ens_bin: 1D ndarray
Binary array of observations for each start date. 1 for an event and 0 for a non-event.
Expand Down Expand Up @@ -901,15 +910,31 @@ def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bi
https://CRAN.R-project.org/package=SpecsVerification
"""

if obs_bin is not None and fcst_ens_bin is not None:
# User supplied the binary matrices
if obs_bin is not None and fcst_ens_bin is not None and fcst_ens is None and obs is None \
and threshold is None and ens_threshold is None and obs_threshold is None:

pass
elif fcst_ens is not None and obs is not None and threshold is not None:
# Convert the observed data and forecast data to binary data.

# User supplied normal matrices with a threshold value to apply to each of them
elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
and threshold is not None and ens_threshold is None and obs_threshold is None:

# Convert the observed data and forecast data to binary data
obs_bin = (obs > threshold).astype(np.int)
fcst_ens_bin = (fcst_ens > threshold).astype(np.int)

# User supplied normal matrices with different thresholds for the forecast ensemble and the observed data
elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
and threshold is None and ens_threshold is not None and obs_threshold is not None:

# Convert the observed data and forecast data to binary data
obs_bin = (obs > obs_threshold).astype(np.int)
fcst_ens_bin = (fcst_ens > ens_threshold).astype(np.int)

else:
raise RuntimeError(" You must either supply fcst_ens, obs, and threshold or you must "
"supply fcst_ens_bin and obs_bin.")
raise RuntimeError(" You must either supply fcst_ens, obs, and threshold (or obs_threshold and ens_threshold "
"if there are different thresholds) or you must supply fcst_ens_bin and obs_bin.")

# Treat missing data and warn users of columns being removed
obs_bin, fcst_ens_bin = treat_data(obs_bin, fcst_ens_bin, remove_neg=False,
Expand All @@ -933,7 +958,8 @@ def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bi
return br


def auroc(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=None):
def auroc(fcst_ens=None, obs=None, threshold=None, ens_threshold=None, obs_threshold=None,
fcst_ens_bin=None, obs_bin=None):
"""
Calculates Area Under the Relative Operating Characteristic curve (AUROC)
for a forecast and its verifying binary observation, and estimates the variance of the AUROC
Expand All @@ -953,6 +979,14 @@ def auroc(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=No
The threshold for an event (e.g. if the event is a 100 year flood, the streamflow value
that a 100 year flood would have to exceed.
ens_threshold: float
If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
set along with the 'obs_threshold' parameter to set different thresholds.
obs_threshold: float
If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
set along with the 'ens_threshold' parameter to set different thresholds.
fcst_ens_bin: 1D ndarray
Binary array of observations for each start date. 1 for an event and 0 for a non-event.
Expand Down Expand Up @@ -1014,16 +1048,31 @@ def auroc(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=No
https://CRAN.R-project.org/package=SpecsVerification
"""
if obs_bin is not None and fcst_ens_bin is not None and fcst_ens is None and obs is None and threshold is None:
# User supplied the binary matrices
if obs_bin is not None and fcst_ens_bin is not None and fcst_ens is None and obs is None \
and threshold is None and ens_threshold is None and obs_threshold is None:

pass
elif fcst_ens is not None and obs is not None and threshold is not None and obs_bin is None and \
fcst_ens_bin is None:
# Convert the observed data and forecast data to binary data.

# User supplied normal matrices with a threshold value to apply to each of them
elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
and threshold is not None and ens_threshold is None and obs_threshold is None:

# Convert the observed data and forecast data to binary data
obs_bin = (obs > threshold).astype(np.int)
fcst_ens_bin = (fcst_ens > threshold).astype(np.int)

# User supplied normal matrices with different thresholds for the forecast ensemble and the observed data
elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
and threshold is None and ens_threshold is not None and obs_threshold is not None:

# Convert the observed data and forecast data to binary data
obs_bin = (obs > obs_threshold).astype(np.int)
fcst_ens_bin = (fcst_ens > ens_threshold).astype(np.int)

else:
raise RuntimeError(" You must either supply fcst_ens, obs, and threshold or you must "
"supply fcst_ens_bin and obs_bin.")
raise RuntimeError(" You must either supply fcst_ens, obs, and threshold (or obs_threshold and ens_threshold "
"if there are different thresholds) or you must supply fcst_ens_bin and obs_bin.")

obs_bin, fcst_ens_bin = treat_data(obs_bin, fcst_ens_bin, remove_neg=False, remove_zero=False)

Expand Down Expand Up @@ -1151,9 +1200,11 @@ def skill_score(scores, bench_scores, perf_score, eff_sample_size=None, remove_n
all_nan_indices = np.logical_and(nan_indices_fcst, nan_indices_obs)
all_treatment_array = np.logical_and(all_treatment_array, all_nan_indices)

warnings.warn("Row(s) {} contained NaN values and the row(s) have been "
"removed for the calculation (Rows are zero indexed).".format(np.where(~all_nan_indices)[0]),
UserWarning)
warnings.warn(
"Row(s) {} contained NaN values and the row(s) have been removed for the calculation (Rows are "
"zero indexed).".format(np.where(~all_nan_indices)[0]),
UserWarning
)

if np.any(np.isinf(scores_copy)) or np.any(np.isinf(bench_scores_copy)):
inf_indices_fcst = ~(np.isinf(scores_copy))
Expand Down
Binary file not shown.
16 changes: 16 additions & 0 deletions hydrostats/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,30 @@ def test_ens_brier(self):
np.testing.assert_allclose(expected_scores, brier_scores_test)
self.assertAlmostEqual(expected_mean_score, brier_scores_test.mean())

# With the data array and different thresholds
expected_scores_diff_thresh = np.load("Files_for_tests/expected_brier_diff_thresh.npy")
expected_scores_diff_thresh_mean = 0.181926775147929

brier_scores_test_diff_thresh = em.ens_brier(
fcst_ens=self.ensemble_array, obs=self.observed_array, obs_threshold=180, ens_threshold=170,
)

np.testing.assert_allclose(expected_scores_diff_thresh, brier_scores_test_diff_thresh)
self.assertAlmostEqual(expected_scores_diff_thresh_mean, brier_scores_test_diff_thresh.mean())

def test_auroc(self):
auroc_expected = np.array([0.45599759, 0.07259804])
auroc_expected_bin = np.array([0.43596949, 0.05864427])
auroc_expected_diff_thresh = np.array([0.3812537673297167, 0.06451017097609267])

auroc_test = em.auroc(fcst_ens=self.ensemble_array, obs=self.observed_array, threshold=180)
auroc_test_diff_thresh = em.auroc(
fcst_ens=self.ensemble_array, obs=self.observed_array, obs_threshold=180, ens_threshold=170,
)
auroc_test_bin = em.auroc(fcst_ens_bin=self.ens_bin, obs_bin=self.obs_bin)

np.testing.assert_allclose(auroc_expected, auroc_test)
np.testing.assert_allclose(auroc_expected_diff_thresh, auroc_test_diff_thresh)
np.testing.assert_allclose(auroc_expected_bin, auroc_test_bin)

def test_skill_score(self):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
setup(
name='hydrostats',
packages=['hydrostats'],
version='0.77',
version='0.78',
description='Tools for use in comparison studies, specifically for use in the field '
'of hydrology',
long_description=README,
Expand Down

0 comments on commit 32992bd

Please sign in to comment.