Adds option called get_event_counts to CatalogForecast and adds arguments to plotting functions (#146)

wsavran · web-flow · commit 6d0bd2c4052b · 2021-10-31T12:21:59.000-07:00
* added verbose option to catalog based tests
* added get_event_counts method to CatalogForecast
* added plotting arguments for more granular customization
* added test for get_event_counts
diff --git a/csep/core/catalog_evaluations.py b/csep/core/catalog_evaluations.py
@@ -1,4 +1,6 @@
 # Third-Party Imports
+import time
+
 import numpy
 import scipy.stats
 
@@ -15,7 +17,7 @@
 from csep.utils.stats import get_quantiles, cumulative_square_diff
 
 
-def number_test(forecast, observed_catalog):
+def number_test(forecast, observed_catalog, verbose=True):
     """ Performs the number test on a catalog-based forecast.
 
     The number test builds an empirical distribution of the event counts for each data. By default, this
@@ -30,7 +32,14 @@ def number_test(forecast, observed_catalog):
         evaluation result (:class:`csep.models.EvaluationResult`): evaluation result
     """
     event_counts = []
-    for catalog in forecast:
+    t0 = time.time()
+    for i, catalog in enumerate(forecast):
+        # output status
+        if verbose:
+            tens_exp = numpy.floor(numpy.log10(i + 1))
+            if (i + 1) % 10 ** tens_exp == 0:
+                t1 = time.time()
+                print(f'Processed {i + 1} catalogs in {t1 - t0} seconds', flush=True)
         event_counts.append(catalog.event_count)
     obs_count = observed_catalog.event_count
     delta_1, delta_2 = get_quantiles(event_counts, obs_count)
@@ -46,7 +55,7 @@ def number_test(forecast, observed_catalog):
                                      obs_name=observed_catalog.name)
     return result
 
-def spatial_test(forecast, observed_catalog):
+def spatial_test(forecast, observed_catalog, verbose=True):
     """ Performs spatial test for catalog-based forecasts.
 
 
@@ -70,7 +79,7 @@ def spatial_test(forecast, observed_catalog):
 
     # compute expected rates for forecast if needed
     if forecast.expected_rates is None:
-        forecast.get_expected_rates()
+        forecast.get_expected_rates(verbose=verbose)
 
     expected_cond_count = forecast.expected_rates.sum()
     forecast_mean_spatial_rates = forecast.expected_rates.spatial_counts()
@@ -81,10 +90,17 @@ def spatial_test(forecast, observed_catalog):
     n_obs = numpy.sum(gridded_obs)
 
     # iterate through catalogs in forecast and compute likelihood
-    for catalog in forecast:
+    t0 = time.time()
+    for i, catalog in enumerate(forecast):
         gridded_cat = catalog.spatial_counts()
         _, lh_norm = _compute_likelihood(gridded_cat, forecast_mean_spatial_rates, expected_cond_count, n_obs)
         test_distribution.append(lh_norm)
+        # output status
+        if verbose:
+            tens_exp = numpy.floor(numpy.log10(i + 1))
+            if (i + 1) % 10 ** tens_exp == 0:
+                t1 = time.time()
+                print(f'Processed {i + 1} catalogs in {t1 - t0} seconds', flush=True)
 
     _, obs_lh_norm = _compute_likelihood(gridded_obs, forecast_mean_spatial_rates, expected_cond_count, n_obs)
     # if obs_lh is -numpy.inf, recompute but only for indexes where obs and simulated are non-zero
@@ -124,7 +140,7 @@ def spatial_test(forecast, observed_catalog):
 
     return result
 
-def magnitude_test(forecast, observed_catalog):
+def magnitude_test(forecast, observed_catalog, verbose=True):
     """ Performs magnitude test for catalog-based forecasts """
     test_distribution = []
 
@@ -149,7 +165,7 @@ def magnitude_test(forecast, observed_catalog):
 
     # compute expected rates for forecast if needed
     if forecast.expected_rates is None:
-        forecast.get_expected_rates()
+        forecast.get_expected_rates(verbose=verbose)
 
     # returns the average events in the magnitude bins
     union_histogram = forecast.expected_rates.magnitude_counts()
@@ -160,7 +176,8 @@ def magnitude_test(forecast, observed_catalog):
     scaled_union_histogram = union_histogram * union_scale
 
     # compute the test statistic for each catalog
-    for catalog in forecast:
+    t0 = time.time()
+    for i, catalog in enumerate(forecast):
         mag_counts = catalog.magnitude_counts()
         n_events = numpy.sum(mag_counts)
         if n_events == 0:
@@ -172,6 +189,12 @@ def magnitude_test(forecast, observed_catalog):
         test_distribution.append(
             cumulative_square_diff(numpy.log10(catalog_histogram + 1), numpy.log10(scaled_union_histogram + 1))
         )
+        # output status
+        if verbose:
+            tens_exp = numpy.floor(numpy.log10(i + 1))
+            if (i + 1) % 10 ** tens_exp == 0:
+                t1 = time.time()
+                print(f'Processed {i + 1} catalogs in {t1 - t0} seconds', flush=True)
 
     # compute observed statistic
     obs_d_statistic = cumulative_square_diff(numpy.log10(obs_histogram + 1), numpy.log10(scaled_union_histogram + 1))
@@ -192,7 +215,7 @@ def magnitude_test(forecast, observed_catalog):
 
     return result
 
-def pseudolikelihood_test(forecast, observed_catalog):
+def pseudolikelihood_test(forecast, observed_catalog, verbose=True):
     """ Performs the spatial pseudolikelihood test for catalog forecasts.
 
     Performs the spatial pseudolikelihood test as described by Savran et al., 2020. The tests uses a pseudolikelihood
@@ -216,7 +239,7 @@ def pseudolikelihood_test(forecast, observed_catalog):
 
     # compute expected rates for forecast if needed
     if forecast.expected_rates is None:
-        _ = forecast.get_expected_rates()
+        _ = forecast.get_expected_rates(verbose=verbose)
 
     expected_cond_count = forecast.expected_rates.sum()
     forecast_mean_spatial_rates = forecast.expected_rates.spatial_counts()
@@ -226,10 +249,17 @@ def pseudolikelihood_test(forecast, observed_catalog):
     gridded_obs = observed_catalog.spatial_counts()
     n_obs = numpy.sum(gridded_obs)
 
-    for catalog in forecast:
+    t0 = time.time()
+    for i, catalog in enumerate(forecast):
         gridded_cat = catalog.spatial_counts()
         plh, _ = _compute_likelihood(gridded_cat, forecast_mean_spatial_rates, expected_cond_count, n_obs)
         test_distribution.append(plh)
+        # output status
+        if verbose:
+            tens_exp = numpy.floor(numpy.log10(i + 1))
+            if (i + 1) % 10 ** tens_exp == 0:
+                t1 = time.time()
+                print(f'Processed {i + 1} catalogs in {t1 - t0} seconds', flush=True)
 
     obs_plh, _ = _compute_likelihood(gridded_obs, forecast_mean_spatial_rates, expected_cond_count, n_obs)
     # if obs_lh is -numpy.inf, recompute but only for indexes where obs and simulated are non-zero
diff --git a/csep/core/forecasts.py b/csep/core/forecasts.py
@@ -535,6 +535,8 @@ def __init__(self, filename=None, catalogs=None, name=None,
         # should be a MarkedGriddedDataSet
         self.expected_rates = expected_rates
 
+        self._event_counts = []
+
         # defines the space, time, and magnitude region of the forecasts
         self.region = region
 
@@ -606,6 +608,8 @@ def __next__(self):
             if self.filter_spatial:
                 catalog = catalog.filter_spatial(self.region)
 
+        self._event_counts.append(catalog.event_count)
+
         if is_generator and self.store:
             self._catalogs.append(catalog)
 
@@ -647,6 +651,22 @@ def magnitude_counts(self):
         else:
             return None
 
+    def get_event_counts(self):
+        """ Returns a numpy array containing the number of event counts for each catalog.
+
+            Note: This function can take a while to compute if called without already iterating through a forecast that
+            is being stored on disk. This should only happen to large forecasts that have been initialized with
+            store = False. This should only happen on the first iteration of the catalog.
+
+            Returns:
+                (numpy.array): event counts with size equal of catalogs in forecast
+        """
+        if len(self._event_counts) == 0:
+            # event counts is filled while iterating over the catalog
+            for _ in self:
+                pass
+        return numpy.array(self._event_counts)
+
     def get_expected_rates(self, verbose=False):
         """ Compute the expected rates in space-magnitude bins
 
diff --git a/csep/utils/plots.py b/csep/utils/plots.py
@@ -696,6 +696,7 @@ def plot_catalog(catalog, ax=None, show=False, extent=None, set_global=False, pl
     # scatter properties
     markersize = plot_args.get('markersize', 2)
     markercolor = plot_args.get('markercolor', 'blue')
+    markeredgecolor = plot_args.get('markeredgecolor', 'black')
     alpha = plot_args.get('alpha', 1)
     mag_scale = plot_args.get('mag_scale', 1)
     legend = plot_args.get('legend', False)
@@ -771,6 +772,7 @@ def size_map(markersize, values, scale):
                            s=size_map(markersize, catalog.get_magnitudes(), mag_scale),
                            transform=cartopy.crs.PlateCarree(),
                            color=markercolor,
+                           edgecolors=markeredgecolor,
                            alpha=alpha)
 
     # Legend
@@ -1345,6 +1347,9 @@ def plot_comparison_test(results_t, results_w=None, axes=None, plot_args=None):
     linewidth = plot_args.get('linewidth', 1)
     markersize = plot_args.get('markersize', 2)
     percentile = plot_args.get('percentile', 95)
+    xticklabels_rotation = plot_args.get('xticklabels_rotation', 90)
+    xlabel_fontsize = plot_args.get('xlabel_fontsize', 12)
+    ylabel_fontsize = plot_args.get('ylabel_fontsize', 12)
 
     if axes is None:
         fig, ax = pyplot.subplots(figsize=figsize)
@@ -1377,10 +1382,10 @@ def plot_comparison_test(results_t, results_w=None, axes=None, plot_args=None):
             facecolor = 'white'
         ax.plot(index, result_t.observed_statistic, marker='o', markerfacecolor=facecolor, markeredgecolor=color, markersize=markersize)
 
-    ax.set_xticklabels([res.sim_name[0] for res in results_t], rotation=90)
+    ax.set_xticklabels([res.sim_name[0] for res in results_t], rotation=xticklabels_rotation)
     ax.set_xticks(numpy.arange(len(results_t)))
-    ax.set_xlabel(xlabel)
-    ax.set_ylabel(ylabel)
+    ax.set_xlabel(xlabel, fontsize=xlabel_fontsize)
+    ax.set_ylabel(ylabel, fontsize=ylabel_fontsize)
     ax.set_title(title)
     ax.yaxis.grid()
     xTickPos = ax.get_xticks()
diff --git a/tests/test_forecast.py b/tests/test_forecast.py
@@ -51,8 +51,10 @@ def test_ascii_some_missing_verbose(self):
         self.assertEqual(10, test_fore.n_cat)
         numpy.testing.assert_array_equal([cat.catalog_id for cat in test_fore], numpy.arange(10))
 
-    def test_ascii_all_present(self):
-        pass
+    def test_get_event_counts(self):
+        fname = os.path.join(get_test_catalog_root(), 'all_present.csv')
+        test_fore = load_catalog_forecast(fname)
+        numpy.testing.assert_array_equal(numpy.ones(10), test_fore.get_event_counts())
 
 if __name__ == '__main__':
     unittest.main()