Merge branch 'v0.6.1-release-branch' of github.com:SCECcode/pycsep into v0.6.1-release-branch

wsavran · wsavran · commit 76844ebff163 · 2022-11-17T09:38:47.000-08:00
diff --git a/csep/core/binomial_evaluations.py b/csep/core/binomial_evaluations.py
@@ -5,6 +5,7 @@
 from csep.models import EvaluationResult
 from csep.core.exceptions import CSEPCatalogException
 
+
 def _nbd_number_test_ndarray(fore_cnt, obs_cnt, variance, epsilon=1e-6):
     """ Computes delta1 and delta2 values from the Negative Binomial (NBD) number test.
 
@@ -99,7 +100,8 @@ def binary_joint_log_likelihood_ndarray(forecast, catalog):
     # Finally, we sum both terms to compute the joint log-likelihood score:
     return sum(first_term.data + second_term.data)
     
-    
+
+
 def _simulate_catalog(sim_cells, sampling_weights, sim_fore, random_numbers=None):
     # Modified this code to generate simulations in a way that every cell gets one earthquake
     # Generate uniformly distributed random numbers in [0,1), this
@@ -164,7 +166,7 @@ def _binary_likelihood_test(forecast_data, observed_data, num_simulations=1000,
         else:
             sim_fore = _simulate_catalog(num_cells_to_simulate, sampling_weights, sim_fore,
                                          random_numbers=random_numbers[idx,:])
-    
+
         # compute joint log-likelihood
         current_ll = binary_joint_log_likelihood_ndarray(forecast_data.data, sim_fore)
         
@@ -208,12 +210,16 @@ def binary_spatial_test(gridded_forecast, observed_catalog, num_simulations=1000
     gridded_catalog_data = observed_catalog.spatial_counts()
 
     # simply call likelihood test on catalog data and forecast
-    qs, obs_ll, simulated_ll = _binary_likelihood_test(gridded_forecast.spatial_counts(), gridded_catalog_data,
-                                                        num_simulations=num_simulations,
-                                                        seed=seed,
-                                                        random_numbers=random_numbers,
-                                                        use_observed_counts=True,
-                                                        verbose=verbose, normalize_likelihood=True)
+    qs, obs_ll, simulated_ll = _binary_likelihood_test(
+        gridded_forecast.spatial_counts(),
+        gridded_catalog_data,
+        num_simulations=num_simulations,
+        seed=seed,
+        random_numbers=random_numbers,
+        use_observed_counts=True,
+        verbose=verbose,
+        normalize_likelihood=True
+    )
 
     
 # populate result data structure
@@ -261,10 +267,16 @@ def binary_conditional_likelihood_test(gridded_forecast, observed_catalog, num_s
     gridded_catalog_data = observed_catalog.spatial_magnitude_counts()
 
     # simply call likelihood test on catalog data and forecast
-    qs, obs_ll, simulated_ll = _binary_likelihood_test(gridded_forecast.data, gridded_catalog_data,
-                                                        num_simulations=num_simulations, seed=seed, random_numbers=random_numbers,
-                                                        use_observed_counts=True,
-                                                        verbose=verbose, normalize_likelihood=False)
+    qs, obs_ll, simulated_ll = _binary_likelihood_test(
+        gridded_forecast.data,
+        gridded_catalog_data,
+        num_simulations=num_simulations,
+        seed=seed,
+        random_numbers=random_numbers,
+        use_observed_counts=True,
+        verbose=verbose,
+        normalize_likelihood=False
+    )
 
     # populate result data structure
     result = EvaluationResult()
diff --git a/csep/core/catalogs.py b/csep/core/catalogs.py
@@ -737,16 +737,16 @@ def spatial_magnitude_counts(self, mag_bins=None, tol=0.00001):
         """ Return counts of events in space-magnitude region.
 
         We figure out the index of the polygons and create a map that relates the spatial coordinate in the
-        Cartesian grid with with the polygon in region.
+        Cartesian grid with the polygon in region.
 
         Args:
-            mag_bins: magnitude bins (optional). tries to use magnitue bins associated with region
+            mag_bins (list, numpy.array): magnitude bins (optional), if empty tries to use magnitude bins associated with region
+            tol (float): tolerance for comparisons within magnitude bins
 
         Returns:
             output: unnormalized event count in each bin, 1d ndarray where index corresponds to midpoints
 
         """
-
         # make sure region is specified with catalog
         if self.region is None:
             raise CSEPCatalogException("Cannot create binned rates without region information.")
@@ -784,8 +784,8 @@ def get_bvalue(self, mag_bins=None, return_error=True):
         If that fails, uses the default magnitude bins provided in constants.
 
         Args:
-            reterr (bool): returns errors
             mag_bins (list or array_like): monotonically increasing set of magnitude bin edges
+            return_error (bool): returns errors
 
         Returns:
             bval (float): b-value
@@ -824,6 +824,10 @@ def p():
         else:
             return bval
 
+    def b_positive(self):
+        """ Implements the b-positive indicator from Nicholas van der Elst """
+        pass
+
     def plot(self, ax=None, show=False, extent=None, set_global=False, plot_args=None):
         """ Plot catalog according to plate-carree projection
 
@@ -1028,9 +1032,10 @@ def read_catalog_line(line):
                         raise ValueError(
                             "catalog_id should be monotonically increasing and events should be ordered by catalog_id")
                 # yield final catalog, note: since this is just loading catalogs, it has no idea how many should be there
-                yield cls(data=events, catalog_id=prev_id, **kwargs)
+                cat = cls(data=events, catalog_id=prev_id, **kwargs)
+                yield cat
 
-        if os.path.isdir(filename):
+        elif os.path.isdir(filename):
             raise NotImplementedError("reading from directory or batched files not implemented yet!")
 
     @classmethod
diff --git a/csep/core/forecasts.py b/csep/core/forecasts.py
@@ -649,7 +649,7 @@ def magnitude_counts(self):
             self.get_expected_rates()
         return self.expected_rates.magnitude_counts()
 
-    def get_event_counts(self):
+    def get_event_counts(self, verbose=True):
         """ Returns a numpy array containing the number of event counts for each catalog.
 
             Note: This function can take a while to compute if called without already iterating through a forecast that
@@ -661,7 +661,13 @@ def get_event_counts(self):
         """
         if len(self._event_counts) == 0:
             # event counts is filled while iterating over the catalog
-            for _ in self:
+            t0 = time.time()
+            for i, _ in enumerate(self):
+                if verbose:
+                    tens_exp = numpy.floor(numpy.log10(i + 1))
+                    if (i + 1) % 10 ** tens_exp == 0:
+                        t1 = time.time()
+                        print(f'Processed {i + 1} catalogs in {t1 - t0:.2f} seconds', flush=True)
                 pass
         return numpy.array(self._event_counts)
 
@@ -697,7 +703,7 @@ def get_expected_rates(self, verbose=False):
                     tens_exp = numpy.floor(numpy.log10(i + 1))
                     if (i + 1) % 10 ** tens_exp == 0:
                         t1 = time.time()
-                        print(f'Processed {i + 1} catalogs in {t1 - t0} seconds', flush=True)
+                        print(f'Processed {i + 1} catalogs in {t1 - t0:.3f} seconds', flush=True)
             # after we iterate through the catalogs, we know self.n_cat
             data = data / self.n_cat
             self.expected_rates = GriddedForecast(self.start_time, self.end_time, data=data, region=self.region,
diff --git a/csep/utils/plots.py b/csep/utils/plots.py
@@ -1557,7 +1557,7 @@ def plot_poisson_consistency_test(eval_results, normalize=False, one_sided_lower
     figsize= plot_args.get('figsize', None)
     title = plot_args.get('title', results[0].name)
     title_fontsize = plot_args.get('title_fontsize', None)
-    xlabel = plot_args.get('xlabel', 'X')
+    xlabel = plot_args.get('xlabel', '')
     xlabel_fontsize = plot_args.get('xlabel_fontsize', None)
     xticks_fontsize = plot_args.get('xticks_fontsize', None)
     ylabel_fontsize = plot_args.get('ylabel_fontsize', None)
@@ -1567,6 +1567,7 @@ def plot_poisson_consistency_test(eval_results, normalize=False, one_sided_lower
     hbars = plot_args.get('hbars', True)
     tight_layout = plot_args.get('tight_layout', True)
     percentile = plot_args.get('percentile', 95)
+    plot_mean = plot_args.get('mean', False)
 
     if axes is None:
         fig, ax = pyplot.subplots(figsize=figsize)
@@ -1580,6 +1581,7 @@ def plot_poisson_consistency_test(eval_results, normalize=False, one_sided_lower
         if res.test_distribution[0] == 'poisson':
             plow = scipy.stats.poisson.ppf((1 - percentile/100.)/2., res.test_distribution[1])
             phigh = scipy.stats.poisson.ppf(1 - (1 - percentile/100.)/2., res.test_distribution[1])
+            mean = res.test_distribution[1]
             observed_statistic = res.observed_statistic
         # empirical distributions
         else:
@@ -1596,12 +1598,14 @@ def plot_poisson_consistency_test(eval_results, normalize=False, one_sided_lower
             else:
                 plow = numpy.percentile(test_distribution, (100 - percentile)/2.)
                 phigh = numpy.percentile(test_distribution, 100 - (100 - percentile)/2.)
+            mean = numpy.mean(res.test_distribution)
 
         if not numpy.isinf(observed_statistic): # Check if test result does not diverges
-            low = observed_statistic - plow
-            high = phigh - observed_statistic
-            ax.errorbar(observed_statistic, index, xerr=numpy.array([[low, high]]).T,
-                        fmt=_get_marker_style(observed_statistic, (plow, phigh), one_sided_lower),
+            percentile_lims = numpy.array([[mean - plow,  phigh - mean]]).T
+            ax.plot(observed_statistic, index,
+                    _get_marker_style(observed_statistic, (plow, phigh), one_sided_lower))
+            ax.errorbar(mean, index, xerr=percentile_lims,
+                        fmt='ko'*plot_mean,
                         capsize=capsize, linewidth=linewidth, ecolor=color)
             # determine the limits to use
             xlims.append((plow, phigh, observed_statistic))
@@ -1887,7 +1891,7 @@ def add_labels_for_publication(figure, style='bssa', labelsize=16):
     return
 
 
-def plot_consistency_test(eval_results, normalize=False, one_sided_lower=True, plot_args=None, variance=None):
+def plot_consistency_test(eval_results, normalize=False, axes=None, one_sided_lower=False, variance=None, plot_args=None, show=False):
     """ Plots results from CSEP1 tests following the CSEP1 convention.
 
     Note: All of the evaluations should be from the same type of evaluation, otherwise the results will not be
@@ -1927,8 +1931,10 @@ def plot_consistency_test(eval_results, normalize=False, one_sided_lower=True, p
     # Parse plot arguments. More can be added here
     if plot_args is None:
         plot_args = {}
-    figsize= plot_args.get('figsize', (7,8))
-    xlabel = plot_args.get('xlabel', 'X')
+    figsize= plot_args.get('figsize', None)
+    title = plot_args.get('title', results[0].name)
+    title_fontsize = plot_args.get('title_fontsize', None)
+    xlabel = plot_args.get('xlabel', '')
     xlabel_fontsize = plot_args.get('xlabel_fontsize', None)
     xticks_fontsize = plot_args.get('xticks_fontsize', None)
     ylabel_fontsize = plot_args.get('ylabel_fontsize', None)
@@ -1938,15 +1944,22 @@ def plot_consistency_test(eval_results, normalize=False, one_sided_lower=True, p
     hbars = plot_args.get('hbars', True)
     tight_layout = plot_args.get('tight_layout', True)
     percentile = plot_args.get('percentile', 95)
+    plot_mean = plot_args.get('mean', False)
+
+    if axes is None:
+        fig, ax = pyplot.subplots(figsize=figsize)
+    else:
+        ax = axes
+        fig = ax.get_figure()
 
-    fig, ax = pyplot.subplots(figsize=figsize)
     xlims = []
     
     for index, res in enumerate(results):
         # handle analytical distributions first, they are all in the form ['name', parameters].
         if res.test_distribution[0] == 'poisson':
             plow = scipy.stats.poisson.ppf((1 - percentile/100.)/2., res.test_distribution[1])
             phigh = scipy.stats.poisson.ppf(1 - (1 - percentile/100.)/2., res.test_distribution[1])
+            mean = res.test_distribution[1]
             observed_statistic = res.observed_statistic
         
         elif res.test_distribution[0] == 'negative_binomial':
@@ -1973,13 +1986,15 @@ def plot_consistency_test(eval_results, normalize=False, one_sided_lower=True, p
             else:
                 plow = numpy.percentile(test_distribution, 2.5)
                 phigh = numpy.percentile(test_distribution, 97.5)
+            mean = numpy.mean(res.test_distribution)
 
         if not numpy.isinf(observed_statistic): # Check if test result does not diverges
-            low = observed_statistic - plow
-            high = phigh - observed_statistic
-            ax.errorbar(observed_statistic, index, xerr=numpy.array([[low, high]]).T,
-                        fmt=_get_marker_style(observed_statistic, (plow, phigh), one_sided_lower),
-                        capsize=4, linewidth=linewidth, ecolor=color, markersize = 10, zorder=1)
+            percentile_lims = numpy.array([[mean - plow,  phigh - mean]]).T
+            ax.plot(observed_statistic, index,
+                    _get_marker_style(observed_statistic, (plow, phigh), one_sided_lower))
+            ax.errorbar(mean, index, xerr=percentile_lims,
+                        fmt='ko'*plot_mean,
+                        capsize=capsize, linewidth=linewidth, ecolor=color)
             # determine the limits to use
             xlims.append((plow, phigh, observed_statistic))
             # we want to only extent the distribution where it falls outside of it in the acceptable tail
@@ -2001,18 +2016,23 @@ def plot_consistency_test(eval_results, normalize=False, one_sided_lower=True, p
     except ValueError:
         raise ValueError('All EvaluationResults have infinite observed_statistics')
     ax.set_yticks(numpy.arange(len(results)))
-    ax.set_yticklabels([res.sim_name for res in results], fontsize=14)
+    ax.set_yticklabels([res.sim_name for res in results], fontsize=ylabel_fontsize)
     ax.set_ylim([-0.5, len(results)-0.5])
     if hbars:
         yTickPos = ax.get_yticks()
         if len(yTickPos) >= 2:
             ax.barh(yTickPos, numpy.array([99999] * len(yTickPos)), left=-10000,
                     height=(yTickPos[1] - yTickPos[0]), color=['w', 'gray'], alpha=0.2, zorder=0)
-    ax.set_xlabel(xlabel, fontsize=14)
-    ax.tick_params(axis='x', labelsize=13)
+    ax.set_title(title, fontsize=title_fontsize)
+    ax.set_xlabel(xlabel, fontsize=xlabel_fontsize)
+    ax.tick_params(axis='x', labelsize=xticks_fontsize)
     if tight_layout:
         ax.figure.tight_layout()
         fig.tight_layout()
+
+    if show:
+        pyplot.show()
+
     return ax
     
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 numpy
 scipy
 pandas
-matplotlib<=3.5.3
+matplotlib
 cartopy
 obspy
 pyproj
diff --git a/requirements.yml b/requirements.yml
@@ -7,7 +7,7 @@ dependencies:
   - numpy
   - pandas
   - scipy
-  - matplotlib<=3.5.3
+  - matplotlib
   - pyproj
   - obspy
   - python-dateutil
diff --git a/setup.py b/setup.py
@@ -30,7 +30,7 @@ def get_version():
         'numpy',
         'scipy',
         'pandas',
-        'matplotlib<=3.5.3',
+        'matplotlib',
         'cartopy',
         'obspy',
         'pyproj',
diff --git a/tests/test_evaluations.py b/tests/test_evaluations.py
@@ -11,6 +11,7 @@ def get_datadir():
     data_dir = os.path.join(root_dir, 'artifacts', 'Comcat')
     return data_dir
 
+
 class TestPoissonLikelihood(unittest.TestCase):
 
     def __init__(self, *args, **kwargs):
@@ -85,5 +86,6 @@ def test_joint_likelihood_calculation(self):
 
         numpy.testing.assert_allclose(bill, -6.7197988064)
 
+
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
diff --git a/tests/test_forecast.py b/tests/test_forecast.py
@@ -1,14 +1,24 @@
-import os, unittest
+import os
+import unittest
 import numpy
 from csep import load_catalog_forecast
 
+
 def get_test_catalog_root():
     root_dir = os.path.dirname(os.path.abspath(__file__))
     data_dir = os.path.join(root_dir, 'artifacts', 'test_ascii_catalogs')
     return data_dir
 
+
 class TestCatalogForecastCreation(unittest.TestCase):
 
+    def test_all_present(self):
+        fname = os.path.join(get_test_catalog_root(), 'all_present.csv')
+        test_fore = load_catalog_forecast(fname)
+        total_event_count = numpy.array([cat.event_count for cat in test_fore]).sum()
+        self.assertEqual(10, test_fore.n_cat)
+        self.assertEqual(10, total_event_count)
+
     def test_ascii_load_all_empty(self):
         fname = os.path.join(get_test_catalog_root(), 'all_empty.csv')
         test_fore = load_catalog_forecast(fname)
@@ -56,5 +66,12 @@ def test_get_event_counts(self):
         test_fore = load_catalog_forecast(fname)
         numpy.testing.assert_array_equal(numpy.ones(10), test_fore.get_event_counts())
 
+    def test_multiple_iterations(self):
+        fname = os.path.join(get_test_catalog_root(), 'all_present.csv')
+        test_fore = load_catalog_forecast(fname)
+        ec1 = [cat.event_count for cat in test_fore]
+        ec2 = [cat.event_count for cat in test_fore]
+        numpy.testing.assert_array_equal(ec1, ec2)
+
 if __name__ == '__main__':
     unittest.main()

-Original file line number
+Diff line change
@@ @@ -1,7 +1,7 @@ @@
 numpy
 scipy
 pandas
 -matplotlib<=3.5.3
 +matplotlib
 cartopy
 obspy
 pyproj