Skip to content

Commit 3f29b45

Browse files
committed
Added non-Poissonian comparative and consistency tests.
1 parent 19c84f3 commit 3f29b45

File tree

1 file changed

+381
-0
lines changed

1 file changed

+381
-0
lines changed
Lines changed: 381 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,381 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
import numpy
5+
import scipy.stats
6+
import scipy.spatial
7+
8+
from csep.models import EvaluationResult
9+
from csep.utils.stats import poisson_joint_log_likelihood_ndarray
10+
from csep.core.exceptions import CSEPCatalogException
11+
from csep.core.regions import QuadtreeGrid2D
12+
13+
def _nbd_number_test_ndarray(fore_cnt, obs_cnt, variance, epsilon=1e-6):
14+
"""
15+
Computes delta1 and delta2 values from the Negative Binomial (NBD) number test.
16+
17+
Args:
18+
fore_cnt (float): parameter of negative binomial distribution coming from expected value of the forecast
19+
obs_cnt (float): count of earthquakes observed during the testing period.
20+
variance (float): variance parameter of negative binomial distribution coming from historical catalog.
21+
A variance value of approximately 23541 has been calculated using M5.95+ earthquakes observed worldwide from 1982 to 2013.
22+
epsilon (float): tolerance level to satisfy the requirements of two-sided p-value
23+
24+
Returns
25+
result (tuple): (delta1, delta2)
26+
"""
27+
var = variance
28+
mean = fore_cnt
29+
upsilon = 1.0 - ((var - mean) / var)
30+
tau = (mean**2 /(var - mean))
31+
32+
delta1 = 1.0 - scipy.stats.nbinom.cdf(obs_cnt - epsilon, tau, upsilon, loc=0)
33+
delta2 = scipy.stats.nbinom.cdf(obs_cnt + epsilon, tau, upsilon, loc=0)
34+
35+
return delta1, delta2
36+
37+
38+
def negative_binomial_number_test(gridded_forecast, observed_catalog, variance):
39+
"""
40+
Computes "negative binomial N-Test" on a gridded forecast.
41+
42+
Computes Number (N) test for Observed and Forecasts. Both data sets are expected to be in terms of event counts.
43+
We find the Total number of events in Observed Catalog and Forecasted Catalogs. Which are then employed to compute the
44+
probablities of
45+
(i) At least no. of events (delta 1)
46+
(ii) At most no. of events (delta 2) assuming the negative binomial distribution.
47+
48+
Args:
49+
gridded_forecast: Forecast of a Model (Gridded) (Numpy Array)
50+
A forecast has to be in terms of Average Number of Events in Each Bin
51+
It can be anything greater than zero
52+
observed_catalog: Observed (Gridded) seismicity (Numpy Array):
53+
An Observation has to be Number of Events in Each Bin
54+
It has to be a either zero or positive integer only (No Floating Point)
55+
variance: Variance parameter of negative binomial distribution obtained from historical catalog.
56+
57+
Returns:
58+
out (tuple): (delta_1, delta_2)
59+
"""
60+
result = EvaluationResult()
61+
62+
# observed count
63+
obs_cnt = observed_catalog.event_count
64+
65+
# forecasts provide the expeceted number of events during the time horizon of the forecast
66+
fore_cnt = gridded_forecast.event_count
67+
68+
epsilon = 1e-6
69+
70+
# stores the actual result of the number test
71+
delta1, delta2 = _nbd_number_test_ndarray(fore_cnt, obs_cnt, variance, epsilon=epsilon)
72+
73+
# store results
74+
result.test_distribution = ('negative_binomial', fore_cnt)
75+
result.name = 'NBD N-Test'
76+
result.observed_statistic = obs_cnt
77+
result.quantile = (delta1, delta2)
78+
result.sim_name = gridded_forecast.name
79+
result.obs_name = observed_catalog.name
80+
result.status = 'normal'
81+
result.min_mw = numpy.min(gridded_forecast.magnitudes)
82+
83+
return result
84+
85+
86+
def binary_joint_log_likelihood_ndarray(forecast, catalog):
87+
"""
88+
Computes Bernoulli log-likelihood scores, assuming that earthquakes follow a binomial distribution.
89+
90+
Args:
91+
forecast: Forecast of a Model (Gridded) (Numpy Array)
92+
A forecast has to be in terms of Average Number of Events in Each Bin
93+
It can be anything greater than zero
94+
catalog: Observed (Gridded) seismicity (Numpy Array):
95+
An Observation has to be Number of Events in Each Bin
96+
It has to be a either zero or positive integer only (No Floating Point)
97+
"""
98+
#First, we mask the forecast in cells where we could find log=0.0 singularities:
99+
forecast_masked = np.ma.masked_where(forecast.ravel() <= 0.0, forecast.ravel())
100+
101+
#Then, we compute the log-likelihood of observing one or more events given a Poisson distribution, i.e., 1 - Pr(0)
102+
target_idx = numpy.nonzero(catalog.ravel())
103+
y = numpy.zeros(forecast_masked.ravel().shape)
104+
y[target_idx[0]] = 1
105+
first_term = y * (np.log(1.0 - np.exp(-forecast_masked.ravel())))
106+
107+
#Also, we estimate the log-likelihood in cells no events are observed:
108+
second_term = (1-y) * (-forecast_masked.ravel().data)
109+
#Finally, we sum both terms to compute the joint log-likelihood score:
110+
return sum(first_term.data + second_term.data)
111+
112+
113+
def _binary_likelihood_test(forecast_data, observed_data, num_simulations=1000, random_numbers=None,
114+
seed=None, use_observed_counts=True, verbose=True, normalize_likelihood=False):
115+
"""
116+
Computes binary conditional-likelihood test from CSEP using an efficient simulation based approach.
117+
Args:
118+
forecast_data (numpy.ndarray): nd array where [:, -1] are the magnitude bins.
119+
observed_data (numpy.ndarray): same format as observation.
120+
num_simulations: default number of simulations to use for likelihood based simulations
121+
seed: used for reproducibility of the prng
122+
random_numbers (numpy.ndarray): can supply an explicit list of random numbers, primarily used for software testing
123+
use_observed_counts (bool): if true, will simulate catalogs using the observed events, if false will draw from poisson
124+
distribution
125+
"""
126+
127+
# Array-masking that avoids log singularities:
128+
forecast_data = numpy.ma.masked_where(forecast_data <= 0.0, forecast_data)
129+
130+
# set seed for the likelihood test
131+
if seed is not None:
132+
numpy.random.seed(seed)
133+
134+
# used to determine where simulated earthquake should be placed, by definition of cumsum these are sorted
135+
sampling_weights = numpy.cumsum(forecast_data.ravel()) / numpy.sum(forecast_data)
136+
137+
# data structures to store results
138+
sim_fore = numpy.zeros(sampling_weights.shape)
139+
simulated_ll = []
140+
n_obs = len(np.unique(np.nonzero(observed_data.ravel())))
141+
n_fore = numpy.sum(forecast_data)
142+
expected_forecast_count = int(n_obs)
143+
144+
if use_observed_counts and normalize_likelihood:
145+
scale = n_obs / n_fore
146+
expected_forecast_count = int(n_obs)
147+
forecast_data = scale * forecast_data
148+
149+
# main simulation step in this loop
150+
for idx in range(num_simulations):
151+
if use_observed_counts:
152+
num_events_to_simulate = int(n_obs)
153+
else:
154+
num_events_to_simulate = int(numpy.random.poisson(expected_forecast_count))
155+
156+
if random_numbers is None:
157+
sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore)
158+
else:
159+
sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore,
160+
random_numbers=random_numbers[idx,:])
161+
162+
163+
# compute joint log-likelihood
164+
current_ll = binary_joint_log_likelihood_ndarray(forecast_data.data, sim_fore)
165+
166+
# append to list of simulated log-likelihoods
167+
simulated_ll.append(current_ll)
168+
169+
# just be verbose
170+
if verbose:
171+
if (idx + 1) % 100 == 0:
172+
print(f'... {idx + 1} catalogs simulated.')
173+
174+
target_idx = numpy.nonzero(catalog.ravel())
175+
176+
# observed joint log-likelihood
177+
obs_ll = binary_joint_log_likelihood_ndarray(forecast_data.data, observed_data)
178+
179+
# quantile score
180+
qs = numpy.sum(simulated_ll <= obs_ll) / num_simulations
181+
182+
# float, float, list
183+
return qs, obs_ll, simulated_ll
184+
185+
186+
def binary_spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
187+
"""
188+
Performs the binary spatial test on the Forecast using the Observed Catalogs.
189+
Note: The forecast and the observations should be scaled to the same time period before calling this function. This increases
190+
transparency as no assumptions are being made about the length of the forecasts. This is particularly important for
191+
gridded forecasts that supply their forecasts as rates.
192+
Args:
193+
gridded_forecast: csep.core.forecasts.GriddedForecast
194+
observed_catalog: csep.core.catalogs.Catalog
195+
num_simulations (int): number of simulations used to compute the quantile score
196+
seed (int): used fore reproducibility, and testing
197+
random_numbers (numpy.ndarray): random numbers used to override the random number generation. injection point for testing.
198+
Returns:
199+
evaluation_result: csep.core.evaluations.EvaluationResult
200+
"""
201+
202+
# grid catalog onto spatial grid
203+
gridded_catalog_data = observed_catalog.spatial_counts()
204+
205+
# simply call likelihood test on catalog data and forecast
206+
qs, obs_ll, simulated_ll = _binary_likelihood_test(gridded_forecast.spatial_counts(), gridded_catalog_data,
207+
num_simulations=num_simulations,
208+
seed=seed,
209+
random_numbers=random_numbers,
210+
use_observed_counts=True,
211+
verbose=verbose, normalize_likelihood=True)
212+
213+
214+
# populate result data structure
215+
result = EvaluationResult()
216+
result.test_distribution = simulated_ll
217+
result.name = 'Binary S-Test'
218+
result.observed_statistic = obs_ll
219+
result.quantile = qs
220+
result.sim_name = gridded_forecast.name
221+
result.obs_name = observed_catalog.name
222+
result.status = 'normal'
223+
try:
224+
result.min_mw = numpy.min(gridded_forecast.magnitudes)
225+
except AttributeError:
226+
result.min_mw = -1
227+
return result
228+
229+
230+
def binary_conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
231+
"""
232+
Performs the binary conditional likelihood test on Gridded Forecast using an Observed Catalog.
233+
234+
Normalizes the forecast so the forecasted rate are consistent with the observations. This modification
235+
eliminates the strong impact differences in the number distribution have on the forecasted rates.
236+
237+
Note: The forecast and the observations should be scaled to the same time period before calling this function. This increases
238+
transparency as no assumptions are being made about the length of the forecasts. This is particularly important for
239+
gridded forecasts that supply their forecasts as rates.
240+
241+
Args:
242+
gridded_forecast: csep.core.forecasts.GriddedForecast
243+
observed_catalog: csep.core.catalogs.Catalog
244+
num_simulations (int): number of simulations used to compute the quantile score
245+
seed (int): used fore reproducibility, and testing
246+
random_numbers (numpy.ndarray): random numbers used to override the random number generation. injection point for testing.
247+
248+
Returns:
249+
evaluation_result: csep.core.evaluations.EvaluationResult
250+
"""
251+
252+
# grid catalog onto spatial grid
253+
try:
254+
_ = observed_catalog.region.magnitudes
255+
except CSEPCatalogException:
256+
observed_catalog.region = gridded_forecast.region
257+
gridded_catalog_data = observed_catalog.spatial_magnitude_counts()
258+
259+
# simply call likelihood test on catalog data and forecast
260+
qs, obs_ll, simulated_ll = _binary_likelihood_test(gridded_forecast.data, gridded_catalog_data,
261+
num_simulations=num_simulations, seed=seed, random_numbers=random_numbers,
262+
use_observed_counts=True,
263+
verbose=verbose, normalize_likelihood=False)
264+
265+
# populate result data structure
266+
result = EvaluationResult()
267+
result.test_distribution = simulated_ll
268+
result.name = 'Binary CL-Test'
269+
result.observed_statistic = obs_ll
270+
result.quantile = qs
271+
result.sim_name = gridded_forecast.name
272+
result.obs_name = observed_catalog.name
273+
result.status = 'normal'
274+
result.min_mw = numpy.min(gridded_forecast.magnitudes)
275+
276+
return result
277+
278+
279+
def matrix_binary_t_test(target_event_rates1, target_event_rates2, n_obs, n_f1, n_f2, catalog, alpha=0.05):
280+
"""
281+
Computes binary T test statistic by comparing two target event rate distributions.
282+
283+
We compare Forecast from Model 1 and with Forecast of Model 2. Information Gain per Active Bin (IGPA) is computed, which is then
284+
employed to compute T statistic. Confidence interval of Information Gain can be computed using T_critical. For a complete
285+
explanation see Rhoades, D. A., et al., (2011). Efficient testing of earthquake forecasting models. Acta Geophysica, 59(4),
286+
728-747. doi:10.2478/s11600-011-0013-5, and Bayona J.A. et al., (2022). Prospective evaluation of multiplicative hybrid earthquake
287+
forecasting models in California. doi: 10.1093/gji/ggac018.
288+
289+
Args:
290+
target_event_rates1 (numpy.ndarray): nd-array storing target event rates
291+
target_event_rates2 (numpy.ndarray): nd-array storing target event rates
292+
n_obs (float, int, numpy.ndarray): number of observed earthquakes, should be whole number and >= zero.
293+
n_f1 (float): Total number of forecasted earthquakes by Model 1
294+
n_f2 (float): Total number of forecasted earthquakes by Model 2
295+
catalog: csep.core.catalogs.Catalog
296+
alpha (float): tolerance level for the type-i error rate of the statistical test
297+
298+
Returns:
299+
out (dict): relevant statistics from the t-test
300+
"""
301+
# Some Pre Calculations - Because they are being used repeatedly.
302+
N_p = n_obs
303+
N = len(np.unique(np.nonzero(catalog.spatial_magnitude_counts().ravel()))) # Number of active bins
304+
N1 = n_f1
305+
N2 = n_f2
306+
X1 = numpy.log(target_event_rates1) # Log of every element of Forecast 1
307+
X2 = numpy.log(target_event_rates2) # Log of every element of Forecast 2
308+
309+
310+
# Information Gain, using Equation (17) of Rhoades et al. 2011
311+
information_gain = (numpy.sum(X1 - X2) - (N1 - N2)) / N
312+
313+
# Compute variance of (X1-X2) using Equation (18) of Rhoades et al. 2011
314+
first_term = (numpy.sum(numpy.power((X1 - X2), 2))) / (N - 1)
315+
second_term = numpy.power(numpy.sum(X1 - X2), 2) / (numpy.power(N, 2) - N)
316+
forecast_variance = first_term - second_term
317+
318+
forecast_std = numpy.sqrt(forecast_variance)
319+
t_statistic = information_gain / (forecast_std / numpy.sqrt(N))
320+
321+
# Obtaining the Critical Value of T from T distribution.
322+
df = N - 1
323+
t_critical = scipy.stats.t.ppf(1 - (alpha / 2), df) # Assuming 2-Tail Distribution for 2 tail, divide 0.05/2.
324+
325+
# Computing Information Gain Interval.
326+
ig_lower = information_gain - (t_critical * forecast_std / numpy.sqrt(N))
327+
ig_upper = information_gain + (t_critical * forecast_std / numpy.sqrt(N))
328+
329+
# If T value greater than T critical, Then both Lower and Upper Confidence Interval limits will be greater than Zero.
330+
# If above Happens, Then It means that Forecasting Model 1 is better than Forecasting Model 2.
331+
return {'t_statistic': t_statistic,
332+
't_critical': t_critical,
333+
'information_gain': information_gain,
334+
'ig_lower': ig_lower,
335+
'ig_upper': ig_upper}
336+
337+
338+
def binary_paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, scale=False):
339+
"""
340+
Computes the binary t-test for gridded earthquake forecasts.
341+
342+
This score is positively oriented, meaning that positive values of the information gain indicate that the
343+
forecast is performing better than the benchmark forecast.
344+
345+
Args:
346+
forecast (csep.core.forecasts.GriddedForecast): nd-array storing gridded rates, axis=-1 should be the magnitude column
347+
benchmark_forecast (csep.core.forecasts.GriddedForecast): nd-array storing gridded rates, axis=-1 should be the magnitude
348+
column
349+
observed_catalog (csep.core.catalogs.AbstractBaseCatalog): number of observed earthquakes, should be whole number and >= zero.
350+
alpha (float): tolerance level for the type-i error rate of the statistical test
351+
scale (bool): if true, scale forecasted rates down to a single day
352+
353+
Returns:
354+
evaluation_result: csep.core.evaluations.EvaluationResult
355+
"""
356+
357+
# needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different
358+
# for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts.
359+
target_event_rate_forecast1p, n_fore1 = forecast.target_event_rates(observed_catalog, scale=scale)
360+
target_event_rate_forecast2p, n_fore2 = benchmark_forecast.target_event_rates(observed_catalog, scale=scale)
361+
362+
target_event_rate_forecast1 = forecast.data.ravel()[np.unique(np.nonzero(observed_catalog.spatial_magnitude_counts().ravel()))]
363+
target_event_rate_forecast2 = benchmark_forecast.data.ravel()[np.unique(np.nonzero(observed_catalog.spatial_magnitude_counts().
364+
ravel()))]
365+
366+
# call the primative version operating on ndarray
367+
out = matrix_binary_t_test(target_event_rate_forecast1, target_event_rate_forecast2, observed_catalog.event_count, n_fore1, n_fore2,
368+
observed_catalog,
369+
alpha=alpha)
370+
371+
# storing this for later
372+
result = EvaluationResult()
373+
result.name = 'binary paired T-Test'
374+
result.test_distribution = (out['ig_lower'], out['ig_upper'])
375+
result.observed_statistic = out['information_gain']
376+
result.quantile = (out['t_statistic'], out['t_critical'])
377+
result.sim_name = (forecast.name, benchmark_forecast.name)
378+
result.obs_name = observed_catalog.name
379+
result.status = 'normal'
380+
result.min_mw = np.min(forecast.magnitudes)
381+
return result

0 commit comments

Comments
 (0)