New changes for version 0.78, see changes in change log

BYU-Hydroinformatics · Apr 24, 2019 · 32992bd · 32992bd
1 parent 386b1fc
commit 32992bd
Show file tree

Hide file tree

Showing 7 changed files with 101 additions and 25 deletions.
diff --git a/docs/change_log.rst b/docs/change_log.rst
@@ -4,6 +4,13 @@ Release Notes
 This is the list of changes to Hydrostats between each release. For full details, see the commit logs at
 https://github.com/BYU-Hydroinformatics/Hydrostats.
 
+Version 0.78
+^^^^^^^^^^^^
+- Added the ability to use different thresholds for the ensemble forecast for the observed and ensemble forecast data in
+  the hydrostats.ens_metrics.auroc() and hydrostats.ens_metrics.ens_brier() methods.
+- Changes to documentation to reflect the addition of the .name and .abbr properties to metrics from the HydroErr
+  package.
+
 Version 0.77
 ^^^^^^^^^^^^
 - Added a new rolling average feature to the hydrostats.data.daily_average(). Set rolling=True as a parameter to use the

diff --git a/docs/conf.py b/docs/conf.py
@@ -47,7 +47,7 @@ def __getattr__(cls, name):
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = '0.77'
+release = '0.78'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/hydrostats/analyze.py b/hydrostats/analyze.py
@@ -37,7 +37,8 @@ def make_table(merged_dataframe, metrics, seasonal_periods=None, mase_m=1, dmod_
     metrics: list of str
         A list of all the metrics that the user wants to calculate. The metrics abbreviations must
         be used (e.g. the abbreviation for the mean error is "ME". Each function has an attribute with the name
-        and abbreviation, so this can be used instead (see example).
+        and abbreviation, so this can be used instead (see example). Also, strings can be typed and found in the
+        quick reference table in this documentation.
 
     seasonal_periods: 2D list of str, optional
         If given, specifies the seasonal periods that the user wants to analyze (e.g. [['06-01',
@@ -132,7 +133,7 @@ def make_table(merged_dataframe, metrics, seasonal_periods=None, mase_m=1, dmod_
 
     Here we make a table and print the results:
 
-    >>> my_metrics = [mae.abbr, r_squared.abbr, nse.abbr, kge_2012.abbr]
+    >>> my_metrics = [mae.abbr, r_squared.abbr, nse.abbr, kge_2012.abbr]  # HydroErr 1.24 or greater is required to use these properties
     >>> seasonal = [['01-01', '03-31'], ['04-01', '06-30'], ['07-01', '09-30'], ['10-01', '12-31']]
     >>> table = ha.make_table(merged_df, my_metrics, seasonal, remove_neg=True, remove_zero=True, location='Magdalena')
     >>> table
@@ -221,7 +222,8 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
         with a datetime index.
 
     metrics: list of str
-        Metric abbreviations that the user would like to use in their lag analysis.
+        Metric abbreviations that the user would like to use in their lag analysis. Each metric function has a property
+        that contains their abbreviation for convenience (see example).
 
     interp_freq: str, optional
         Frequency of the interpolation for both the simulated and observed time series.
@@ -324,7 +326,7 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
 
     >>> import hydrostats.analyze as ha
     >>> import hydrostats.data as hd
-    >>> pd.options.display.max_columns = 50
+    >>> from hydrostats.metrics import me, r_squared, rmse, kge_2012, nse, dr
     >>>
     >>> # Defining the URLs of the datasets
     >>> sfpt_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/sfpt_data/magdalena-calamar_interim_data.csv'
@@ -334,8 +336,8 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
 
     There are two dataframes that are returned as part of the analysis.
 
-    >>> # Running the lag analysis
-    >>> time_lag_df, summary_df = ha.time_lag(merged_df, metrics=['ME', 'r2', 'RMSE', 'KGE (2012)', 'NSE'])
+    >>> # Running the lag analysis, not that HydroErr > 1.24 must be used to access the .abbr property
+    >>> time_lag_df, summary_df = ha.time_lag(merged_df, metrics=[me.abbr, r_squared.abbr, rmse.abbr, kge_2012.abbr, nse.abbr])
     >>> summary_df
                         Max  Max Lag Number          Min  Min Lag Number
     ME           174.740510           -28.0   174.740510           -24.0
@@ -347,7 +349,7 @@ def time_lag(merged_dataframe, metrics, interp_freq='6H', interp_type='pchip',
     A plot can be created that visualizes the different metrics throughout the time lags. It can be
     saved using the savefig parameter as well if desired.
 
-    >>> _, _ = ha.time_lag(merged_df, metrics=['r2', 'KGE (2012)', 'dr'], plot=True)
+    >>> _, _ = ha.time_lag(merged_df, metrics=[r_squared.abbr, kge_2012.abbr, dr.abbr], plot=True)
 
     .. image:: /Figures/lag_plot1.png
 

diff --git a/hydrostats/ens_metrics.py b/hydrostats/ens_metrics.py
@@ -819,7 +819,8 @@ def crps_kernel(obs, fcst_ens, remove_neg=False, remove_zero=False):
     return output
 
 
-def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=None, adj=None):
+def ens_brier(fcst_ens=None, obs=None, threshold=None, ens_threshold=None, obs_threshold=None, fcst_ens_bin=None,
+              obs_bin=None, adj=None):
     """
     Calculate the ensemble-adjusted Brier Score.
 
@@ -838,6 +839,14 @@ def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bi
         The threshold for an event (e.g. if the event is a 100 year flood, the streamflow value
         that a 100 year flood would have to exceed.
 
+    ens_threshold: float
+        If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
+        set along with the 'obs_threshold' parameter to set different thresholds.
+
+    obs_threshold: float
+        If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
+        set along with the 'ens_threshold' parameter to set different thresholds.
+
     fcst_ens_bin: 1D ndarray
         Binary array of observations for each start date. 1 for an event and 0 for a non-event.
 
@@ -901,15 +910,31 @@ def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bi
       https://CRAN.R-project.org/package=SpecsVerification
     """
 
-    if obs_bin is not None and fcst_ens_bin is not None:
+    # User supplied the binary matrices
+    if obs_bin is not None and fcst_ens_bin is not None and fcst_ens is None and obs is None \
+            and threshold is None and ens_threshold is None and obs_threshold is None:
+
         pass
-    elif fcst_ens is not None and obs is not None and threshold is not None:
-        # Convert the observed data and forecast data to binary data.
+
+    # User supplied normal matrices with a threshold value to apply to each of them
+    elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
+            and threshold is not None and ens_threshold is None and obs_threshold is None:
+
+        # Convert the observed data and forecast data to binary data
         obs_bin = (obs > threshold).astype(np.int)
         fcst_ens_bin = (fcst_ens > threshold).astype(np.int)
+
+    # User supplied normal matrices with different thresholds for the forecast ensemble and the observed data
+    elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
+            and threshold is None and ens_threshold is not None and obs_threshold is not None:
+
+        # Convert the observed data and forecast data to binary data
+        obs_bin = (obs > obs_threshold).astype(np.int)
+        fcst_ens_bin = (fcst_ens > ens_threshold).astype(np.int)
+
     else:
-        raise RuntimeError(" You must either supply fcst_ens, obs, and threshold or you must "
-                           "supply fcst_ens_bin and obs_bin.")
+        raise RuntimeError(" You must either supply fcst_ens, obs, and threshold (or obs_threshold and ens_threshold "
+                           "if there are different thresholds) or you must supply fcst_ens_bin and obs_bin.")
 
     # Treat missing data and warn users of columns being removed
     obs_bin, fcst_ens_bin = treat_data(obs_bin, fcst_ens_bin, remove_neg=False,
@@ -933,7 +958,8 @@ def ens_brier(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bi
     return br
 
 
-def auroc(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=None):
+def auroc(fcst_ens=None, obs=None, threshold=None, ens_threshold=None, obs_threshold=None,
+          fcst_ens_bin=None, obs_bin=None):
     """
     Calculates Area Under the Relative Operating Characteristic curve (AUROC)
     for a forecast and its verifying binary observation, and estimates the variance of the AUROC
@@ -953,6 +979,14 @@ def auroc(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=No
         The threshold for an event (e.g. if the event is a 100 year flood, the streamflow value
         that a 100 year flood would have to exceed.
 
+    ens_threshold: float
+        If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
+        set along with the 'obs_threshold' parameter to set different thresholds.
+
+    obs_threshold: float
+        If different threshholds for the ensemble forecast and the observed data is desired, then this parameter can be
+        set along with the 'ens_threshold' parameter to set different thresholds.
+
     fcst_ens_bin: 1D ndarray
         Binary array of observations for each start date. 1 for an event and 0 for a non-event.
 
@@ -1014,16 +1048,31 @@ def auroc(fcst_ens=None, obs=None, threshold=None, fcst_ens_bin=None, obs_bin=No
       https://CRAN.R-project.org/package=SpecsVerification
 
     """
-    if obs_bin is not None and fcst_ens_bin is not None and fcst_ens is None and obs is None and threshold is None:
+    # User supplied the binary matrices
+    if obs_bin is not None and fcst_ens_bin is not None and fcst_ens is None and obs is None \
+            and threshold is None and ens_threshold is None and obs_threshold is None:
+
         pass
-    elif fcst_ens is not None and obs is not None and threshold is not None and obs_bin is None and \
-            fcst_ens_bin is None:
-        # Convert the observed data and forecast data to binary data.
+
+    # User supplied normal matrices with a threshold value to apply to each of them
+    elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
+            and threshold is not None and ens_threshold is None and obs_threshold is None:
+
+        # Convert the observed data and forecast data to binary data
         obs_bin = (obs > threshold).astype(np.int)
         fcst_ens_bin = (fcst_ens > threshold).astype(np.int)
+
+    # User supplied normal matrices with different thresholds for the forecast ensemble and the observed data
+    elif obs_bin is None and fcst_ens_bin is None and fcst_ens is not None and obs is not None \
+            and threshold is None and ens_threshold is not None and obs_threshold is not None:
+
+        # Convert the observed data and forecast data to binary data
+        obs_bin = (obs > obs_threshold).astype(np.int)
+        fcst_ens_bin = (fcst_ens > ens_threshold).astype(np.int)
+
     else:
-        raise RuntimeError(" You must either supply fcst_ens, obs, and threshold or you must "
-                           "supply fcst_ens_bin and obs_bin.")
+        raise RuntimeError(" You must either supply fcst_ens, obs, and threshold (or obs_threshold and ens_threshold "
+                           "if there are different thresholds) or you must supply fcst_ens_bin and obs_bin.")
 
     obs_bin, fcst_ens_bin = treat_data(obs_bin, fcst_ens_bin, remove_neg=False, remove_zero=False)
 
@@ -1151,9 +1200,11 @@ def skill_score(scores, bench_scores, perf_score, eff_sample_size=None, remove_n
                 all_nan_indices = np.logical_and(nan_indices_fcst, nan_indices_obs)
                 all_treatment_array = np.logical_and(all_treatment_array, all_nan_indices)
 
-                warnings.warn("Row(s) {} contained NaN values and the row(s) have been "
-                              "removed for the calculation (Rows are zero indexed).".format(np.where(~all_nan_indices)[0]),
-                              UserWarning)
+                warnings.warn(
+                    "Row(s) {} contained NaN values and the row(s) have been removed for the calculation (Rows are "
+                    "zero indexed).".format(np.where(~all_nan_indices)[0]),
+                    UserWarning
+                )
 
             if np.any(np.isinf(scores_copy)) or np.any(np.isinf(bench_scores_copy)):
                 inf_indices_fcst = ~(np.isinf(scores_copy))

diff --git a/hydrostats/tests/Files_for_tests/expected_brier_diff_thresh.npy b/hydrostats/tests/Files_for_tests/expected_brier_diff_thresh.npy
diff --git a/hydrostats/tests/tests.py b/hydrostats/tests/tests.py
@@ -252,14 +252,30 @@ def test_ens_brier(self):
         np.testing.assert_allclose(expected_scores, brier_scores_test)
         self.assertAlmostEqual(expected_mean_score, brier_scores_test.mean())
 
+        # With the data array and different thresholds
+        expected_scores_diff_thresh = np.load("Files_for_tests/expected_brier_diff_thresh.npy")
+        expected_scores_diff_thresh_mean = 0.181926775147929
+
+        brier_scores_test_diff_thresh = em.ens_brier(
+            fcst_ens=self.ensemble_array, obs=self.observed_array, obs_threshold=180, ens_threshold=170,
+        )
+
+        np.testing.assert_allclose(expected_scores_diff_thresh, brier_scores_test_diff_thresh)
+        self.assertAlmostEqual(expected_scores_diff_thresh_mean, brier_scores_test_diff_thresh.mean())
+
     def test_auroc(self):
         auroc_expected = np.array([0.45599759, 0.07259804])
         auroc_expected_bin = np.array([0.43596949, 0.05864427])
+        auroc_expected_diff_thresh = np.array([0.3812537673297167, 0.06451017097609267])
 
         auroc_test = em.auroc(fcst_ens=self.ensemble_array, obs=self.observed_array, threshold=180)
+        auroc_test_diff_thresh = em.auroc(
+            fcst_ens=self.ensemble_array, obs=self.observed_array, obs_threshold=180, ens_threshold=170,
+        )
         auroc_test_bin = em.auroc(fcst_ens_bin=self.ens_bin, obs_bin=self.obs_bin)
 
         np.testing.assert_allclose(auroc_expected, auroc_test)
+        np.testing.assert_allclose(auroc_expected_diff_thresh, auroc_test_diff_thresh)
         np.testing.assert_allclose(auroc_expected_bin, auroc_test_bin)
 
     def test_skill_score(self):

diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
 setup(
     name='hydrostats',
     packages=['hydrostats'],
-    version='0.77',
+    version='0.78',
     description='Tools for use in comparison studies, specifically for use in the field '
                 'of hydrology',
     long_description=README,