44import numpy
55import scipy .stats
66import scipy .spatial
7+ import warnings
78
89from csep .models import EvaluationResult
910from csep .utils .stats import poisson_joint_log_likelihood_ndarray
1011from csep .core .exceptions import CSEPCatalogException
11- from csep .core .regions import QuadtreeGrid2D
1212
1313
14- def paired_t_test (forecast , benchmark_forecast , observed_catalog , alpha = 0.05 , scale = False ):
14+ def paired_t_test (forecast , benchmark_forecast , observed_catalog ,
15+ alpha = 0.05 , scale = False ):
1516 """ Computes the t-test for gridded earthquake forecasts.
1617
1718 This score is positively oriented, meaning that positive values of the information gain indicate that the
@@ -30,11 +31,15 @@ def paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, sc
3031
3132 # needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different
3233 # for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts.
33- target_event_rate_forecast1 , n_fore1 = forecast .target_event_rates (observed_catalog , scale = scale )
34- target_event_rate_forecast2 , n_fore2 = benchmark_forecast .target_event_rates (observed_catalog , scale = scale )
34+ target_event_rate_forecast1 , n_fore1 = forecast .target_event_rates (
35+ observed_catalog , scale = scale )
36+ target_event_rate_forecast2 , n_fore2 = benchmark_forecast .target_event_rates (
37+ observed_catalog , scale = scale )
3538
3639 # call the primative version operating on ndarray
37- out = _t_test_ndarray (target_event_rate_forecast1 , target_event_rate_forecast2 , observed_catalog .event_count ,
40+ out = _t_test_ndarray (target_event_rate_forecast1 ,
41+ target_event_rate_forecast2 ,
42+ observed_catalog .event_count ,
3843 n_fore1 , n_fore2 , alpha = alpha )
3944
4045 # storing this for later
@@ -49,7 +54,9 @@ def paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, sc
4954 result .min_mw = numpy .min (forecast .magnitudes )
5055 return result
5156
52- def w_test (gridded_forecast1 , gridded_forecast2 , observed_catalog , scale = False ):
57+
58+ def w_test (gridded_forecast1 , gridded_forecast2 , observed_catalog ,
59+ scale = False ):
5360 """ Calculate the Single Sample Wilcoxon signed-rank test between two gridded forecasts.
5461
5562 This test allows to test the null hypothesis that the median of Sample (X1(i)-X2(i)) is equal to a (N1-N2) / N_obs.
@@ -79,14 +86,18 @@ def w_test(gridded_forecast1, gridded_forecast2, observed_catalog, scale=False):
7986
8087 # needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different
8188 # for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts.
82- target_event_rate_forecast1 , _ = gridded_forecast1 .target_event_rates (observed_catalog , scale = scale )
83- target_event_rate_forecast2 , _ = gridded_forecast2 .target_event_rates (observed_catalog , scale = scale )
89+ target_event_rate_forecast1 , _ = gridded_forecast1 .target_event_rates (
90+ observed_catalog , scale = scale )
91+ target_event_rate_forecast2 , _ = gridded_forecast2 .target_event_rates (
92+ observed_catalog , scale = scale )
8493
8594 N = observed_catalog .event_count # Sum of all the observed earthquakes
8695 N1 = gridded_forecast1 .event_count # Total number of Forecasted earthquakes by Model 1
8796 N2 = gridded_forecast2 .event_count # Total number of Forecasted earthquakes by Model 2
88- X1 = numpy .log (target_event_rate_forecast1 ) # Log of every element of Forecast 1
89- X2 = numpy .log (target_event_rate_forecast2 ) # Log of every element of Forecast 2
97+ X1 = numpy .log (
98+ target_event_rate_forecast1 ) # Log of every element of Forecast 1
99+ X2 = numpy .log (
100+ target_event_rate_forecast2 ) # Log of every element of Forecast 2
90101
91102 # this ratio is the same as long as we scale all the forecasts and catalog rates by the same value
92103 median_value = (N1 - N2 ) / N
@@ -110,6 +121,7 @@ def w_test(gridded_forecast1, gridded_forecast2, observed_catalog, scale=False):
110121 result .min_mw = numpy .min (gridded_forecast1 .magnitudes )
111122 return result
112123
124+
113125def number_test (gridded_forecast , observed_catalog ):
114126 """Computes "N-Test" on a gridded forecast.
115127 author: @asim
@@ -155,7 +167,9 @@ def number_test(gridded_forecast, observed_catalog):
155167
156168 return result
157169
158- def conditional_likelihood_test (gridded_forecast , observed_catalog , num_simulations = 1000 , seed = None ,
170+
171+ def conditional_likelihood_test (gridded_forecast , observed_catalog ,
172+ num_simulations = 1000 , seed = None ,
159173 random_numbers = None , verbose = False ):
160174 """Performs the conditional likelihood test on Gridded Forecast using an Observed Catalog.
161175
@@ -186,7 +200,8 @@ def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulati
186200 gridded_catalog_data = observed_catalog .spatial_magnitude_counts ()
187201
188202 # simply call likelihood test on catalog data and forecast
189- qs , obs_ll , simulated_ll = _poisson_likelihood_test (gridded_forecast .data , gridded_catalog_data ,
203+ qs , obs_ll , simulated_ll = _poisson_likelihood_test (gridded_forecast .data ,
204+ gridded_catalog_data ,
190205 num_simulations = num_simulations ,
191206 seed = seed ,
192207 random_numbers = random_numbers ,
@@ -207,6 +222,7 @@ def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulati
207222
208223 return result
209224
225+
210226def poisson_spatial_likelihood (forecast , catalog ):
211227 """
212228 This function computes the observed log-likehood score obtained by a gridded forecast in each cell, given a
@@ -227,15 +243,17 @@ def poisson_spatial_likelihood(forecast, catalog):
227243 """
228244
229245 scale = catalog .event_count / forecast .event_count
230-
246+
231247 first_term = - forecast .spatial_counts () * scale
232- second_term = catalog .spatial_counts () * numpy .log (forecast .spatial_counts () * scale )
248+ second_term = catalog .spatial_counts () * numpy .log (
249+ forecast .spatial_counts () * scale )
233250 third_term = - scipy .special .loggamma (catalog .spatial_counts () + 1 )
234-
251+
235252 poll = first_term + second_term + third_term
236-
253+
237254 return poll
238255
256+
239257def binary_spatial_likelihood (forecast , catalog ):
240258 """
241259 This function computes log-likelihood scores (bills), using a binary likelihood distribution of earthquakes.
@@ -253,24 +271,27 @@ def binary_spatial_likelihood(forecast, catalog):
253271 Returns:
254272 bill: Binary-based log-likelihood scores obtained by the forecast in each spatial cell.
255273 """
256-
274+
257275 scale = catalog .event_count / forecast .event_count
258276 target_idx = numpy .nonzero (catalog .spatial_counts ())
259277 X = numpy .zeros (forecast .spatial_counts ().shape )
260278 X [target_idx [0 ]] = 1
261-
279+
262280 # First, we estimate the log-likelihood in cells where no events are observed:
263- first_term = (1 - X ) * (- forecast .spatial_counts () * scale )
264-
281+ first_term = (1 - X ) * (- forecast .spatial_counts () * scale )
282+
265283 # Then, we compute the log-likelihood of observing one or more events given a Poisson distribution, i.e., 1 - Pr(0):
266- second_term = X * (numpy .log (1.0 - numpy .exp (- forecast .spatial_counts () * scale )))
267-
284+ second_term = X * (
285+ numpy .log (1.0 - numpy .exp (- forecast .spatial_counts () * scale )))
286+
268287 # Finally, we sum both terms to compute log-likelihood score in each spatial cell:
269288 bill = first_term + second_term
270-
289+
271290 return bill
272291
273- def magnitude_test (gridded_forecast , observed_catalog , num_simulations = 1000 , seed = None , random_numbers = None ,
292+
293+ def magnitude_test (gridded_forecast , observed_catalog , num_simulations = 1000 ,
294+ seed = None , random_numbers = None ,
274295 verbose = False ):
275296 """
276297 Performs the Magnitude Test on a Gridded Forecast using an observed catalog.
@@ -291,16 +312,18 @@ def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, see
291312 """
292313
293314 # grid catalog onto spatial grid
294- gridded_catalog_data = observed_catalog .magnitude_counts (mag_bins = gridded_forecast .magnitudes )
315+ gridded_catalog_data = observed_catalog .magnitude_counts (
316+ mag_bins = gridded_forecast .magnitudes )
295317
296318 # simply call likelihood test on catalog data and forecast
297- qs , obs_ll , simulated_ll = _poisson_likelihood_test (gridded_forecast .magnitude_counts (), gridded_catalog_data ,
298- num_simulations = num_simulations ,
299- seed = seed ,
300- random_numbers = random_numbers ,
301- use_observed_counts = True ,
302- verbose = verbose ,
303- normalize_likelihood = True )
319+ qs , obs_ll , simulated_ll = _poisson_likelihood_test (
320+ gridded_forecast .magnitude_counts (), gridded_catalog_data ,
321+ num_simulations = num_simulations ,
322+ seed = seed ,
323+ random_numbers = random_numbers ,
324+ use_observed_counts = True ,
325+ verbose = verbose ,
326+ normalize_likelihood = True )
304327
305328 # populate result data structure
306329 result = EvaluationResult ()
@@ -315,7 +338,9 @@ def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, see
315338
316339 return result
317340
318- def spatial_test (gridded_forecast , observed_catalog , num_simulations = 1000 , seed = None , random_numbers = None ,
341+
342+ def spatial_test (gridded_forecast , observed_catalog , num_simulations = 1000 ,
343+ seed = None , random_numbers = None ,
319344 verbose = False ):
320345 """
321346 Performs the Spatial Test on the Forecast using the Observed Catalogs.
@@ -338,13 +363,14 @@ def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=
338363 gridded_catalog_data = observed_catalog .spatial_counts ()
339364
340365 # simply call likelihood test on catalog data and forecast
341- qs , obs_ll , simulated_ll = _poisson_likelihood_test (gridded_forecast .spatial_counts (), gridded_catalog_data ,
342- num_simulations = num_simulations ,
343- seed = seed ,
344- random_numbers = random_numbers ,
345- use_observed_counts = True ,
346- verbose = verbose ,
347- normalize_likelihood = True )
366+ qs , obs_ll , simulated_ll = _poisson_likelihood_test (
367+ gridded_forecast .spatial_counts (), gridded_catalog_data ,
368+ num_simulations = num_simulations ,
369+ seed = seed ,
370+ random_numbers = random_numbers ,
371+ use_observed_counts = True ,
372+ verbose = verbose ,
373+ normalize_likelihood = True )
348374
349375 # populate result data structure
350376 result = EvaluationResult ()
@@ -361,7 +387,9 @@ def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=
361387 result .min_mw = - 1
362388 return result
363389
364- def likelihood_test (gridded_forecast , observed_catalog , num_simulations = 1000 , seed = None , random_numbers = None ,
390+
391+ def likelihood_test (gridded_forecast , observed_catalog , num_simulations = 1000 ,
392+ seed = None , random_numbers = None ,
365393 verbose = False ):
366394 """
367395 Performs the likelihood test on Gridded Forecast using an Observed Catalog.
@@ -392,7 +420,8 @@ def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, se
392420 gridded_catalog_data = observed_catalog .spatial_magnitude_counts ()
393421
394422 # simply call likelihood test on catalog and forecast
395- qs , obs_ll , simulated_ll = _poisson_likelihood_test (gridded_forecast .data , gridded_catalog_data ,
423+ qs , obs_ll , simulated_ll = _poisson_likelihood_test (gridded_forecast .data ,
424+ gridded_catalog_data ,
396425 num_simulations = num_simulations ,
397426 seed = seed ,
398427 random_numbers = random_numbers ,
@@ -413,6 +442,7 @@ def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, se
413442
414443 return result
415444
445+
416446def _number_test_ndarray (fore_cnt , obs_cnt , epsilon = 1e-6 ):
417447 """ Computes delta1 and delta2 values from the csep1 number test.
418448
@@ -428,7 +458,9 @@ def _number_test_ndarray(fore_cnt, obs_cnt, epsilon=1e-6):
428458 delta2 = scipy .stats .poisson .cdf (obs_cnt + epsilon , fore_cnt )
429459 return delta1 , delta2
430460
431- def _t_test_ndarray (target_event_rates1 , target_event_rates2 , n_obs , n_f1 , n_f2 , alpha = 0.05 ):
461+
462+ def _t_test_ndarray (target_event_rates1 , target_event_rates2 , n_obs , n_f1 ,
463+ n_f2 , alpha = 0.05 ):
432464 """ Computes T test statistic by comparing two target event rate distributions.
433465
434466 We compare Forecast from Model 1 and with Forecast of Model 2. Information Gain is computed, which is then employed
@@ -466,7 +498,8 @@ def _t_test_ndarray(target_event_rates1, target_event_rates2, n_obs, n_f1, n_f2,
466498
467499 # Obtaining the Critical Value of T from T distribution.
468500 df = N - 1
469- t_critical = scipy .stats .t .ppf (1 - (alpha / 2 ), df ) # Assuming 2-Tail Distribution for 2 tail, divide 0.05/2.
501+ t_critical = scipy .stats .t .ppf (1 - (alpha / 2 ),
502+ df ) # Assuming 2-Tail Distribution for 2 tail, divide 0.05/2.
470503
471504 # Computing Information Gain Interval.
472505 ig_lower = information_gain - (t_critical * forecast_std / numpy .sqrt (N ))
@@ -480,6 +513,7 @@ def _t_test_ndarray(target_event_rates1, target_event_rates2, n_obs, n_f1, n_f2,
480513 'ig_lower' : ig_lower ,
481514 'ig_upper' : ig_upper }
482515
516+
483517def _w_test_ndarray (x , m = 0 ):
484518 """ Calculate the Single Sample Wilcoxon signed-rank test for an ndarray.
485519
@@ -507,7 +541,7 @@ def _w_test_ndarray(x, m=0):
507541
508542 count = len (d )
509543 if count < 10 :
510- numpy . warnings .warn ("Sample size too small for normal approximation." )
544+ warnings .warn ("Sample size too small for normal approximation." )
511545
512546 # compute ranks
513547 r = scipy .stats .rankdata (abs (d ))
@@ -542,7 +576,9 @@ def _w_test_ndarray(x, m=0):
542576
543577 return w_test_eval
544578
545- def _simulate_catalog (num_events , sampling_weights , sim_fore , random_numbers = None ):
579+
580+ def _simulate_catalog (num_events , sampling_weights , sim_fore ,
581+ random_numbers = None ):
546582 # generate uniformly distributed random numbers in [0,1), this
547583 if random_numbers is None :
548584 random_numbers = numpy .random .rand (num_events )
@@ -562,8 +598,11 @@ def _simulate_catalog(num_events, sampling_weights, sim_fore, random_numbers=Non
562598
563599 return sim_fore
564600
565- def _poisson_likelihood_test (forecast_data , observed_data , num_simulations = 1000 , random_numbers = None ,
566- seed = None , use_observed_counts = True , verbose = True , normalize_likelihood = False ):
601+
602+ def _poisson_likelihood_test (forecast_data , observed_data ,
603+ num_simulations = 1000 , random_numbers = None ,
604+ seed = None , use_observed_counts = True , verbose = True ,
605+ normalize_likelihood = False ):
567606 """
568607 Computes the likelihood-test from CSEP using an efficient simulation based approach.
569608 Args:
@@ -582,7 +621,8 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
582621 numpy .random .seed (seed )
583622
584623 # used to determine where simulated earthquake should be placed, by definition of cumsum these are sorted
585- sampling_weights = numpy .cumsum (forecast_data .ravel ()) / numpy .sum (forecast_data )
624+ sampling_weights = numpy .cumsum (forecast_data .ravel ()) / numpy .sum (
625+ forecast_data )
586626
587627 # data structures to store results
588628 sim_fore = numpy .zeros (sampling_weights .shape )
@@ -605,29 +645,35 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
605645
606646 # note for performance: these operations perform copies
607647 observed_data_nonzero = observed_data .ravel ()[target_idx ]
608- target_event_forecast = log_bin_expectations [target_idx ] * observed_data_nonzero
648+ target_event_forecast = log_bin_expectations [
649+ target_idx ] * observed_data_nonzero
609650
610651 # main simulation step in this loop
611652 for idx in range (num_simulations ):
612653 if use_observed_counts :
613654 num_events_to_simulate = int (n_obs )
614655 else :
615- num_events_to_simulate = int (numpy .random .poisson (expected_forecast_count ))
656+ num_events_to_simulate = int (
657+ numpy .random .poisson (expected_forecast_count ))
616658
617659 if random_numbers is None :
618- sim_fore = _simulate_catalog (num_events_to_simulate , sampling_weights , sim_fore )
660+ sim_fore = _simulate_catalog (num_events_to_simulate ,
661+ sampling_weights , sim_fore )
619662 else :
620- sim_fore = _simulate_catalog (num_events_to_simulate , sampling_weights , sim_fore ,
663+ sim_fore = _simulate_catalog (num_events_to_simulate ,
664+ sampling_weights , sim_fore ,
621665 random_numbers = random_numbers [idx , :])
622666
623667 # compute joint log-likelihood from simulation by leveraging that only cells with target events contribute to likelihood
624668 sim_target_idx = numpy .nonzero (sim_fore )
625669 sim_obs_nonzero = sim_fore [sim_target_idx ]
626- sim_target_event_forecast = log_bin_expectations [sim_target_idx ] * sim_obs_nonzero
670+ sim_target_event_forecast = log_bin_expectations [
671+ sim_target_idx ] * sim_obs_nonzero
627672
628673 # compute joint log-likelihood
629- current_ll = poisson_joint_log_likelihood_ndarray (sim_target_event_forecast , sim_obs_nonzero ,
630- expected_forecast_count )
674+ current_ll = poisson_joint_log_likelihood_ndarray (
675+ sim_target_event_forecast , sim_obs_nonzero ,
676+ expected_forecast_count )
631677
632678 # append to list of simulated log-likelihoods
633679 simulated_ll .append (current_ll )
@@ -638,7 +684,9 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
638684 print (f'... { idx + 1 } catalogs simulated.' )
639685
640686 # observed joint log-likelihood
641- obs_ll = poisson_joint_log_likelihood_ndarray (target_event_forecast , observed_data_nonzero , expected_forecast_count )
687+ obs_ll = poisson_joint_log_likelihood_ndarray (target_event_forecast ,
688+ observed_data_nonzero ,
689+ expected_forecast_count )
642690
643691 # quantile score
644692 qs = numpy .sum (simulated_ll <= obs_ll ) / num_simulations
0 commit comments