Skip to content

Commit

Permalink
Save statmap significance fits information (gwastro#4951)
Browse files Browse the repository at this point in the history
* Report significance fit info in sngls_findtrigs

* Report significance calculation info in statmap jobs

* Fix typo, fix test

* TD comments, some tidying up

* neaten comments

* comment fix

---------

Co-authored-by: Thomas Dent <thomas.dent@usc.es>
  • Loading branch information
GarethCabournDavies and tdent authored Dec 11, 2024
1 parent d57077f commit fb838c6
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 28 deletions.
12 changes: 6 additions & 6 deletions bin/all_sky_search/pycbc_add_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -308,14 +308,14 @@ if injection_style:
for bg_fname in args.background_files:
bg_f = pycbc.io.HFile(bg_fname, 'r')
ifo_combo_key = bg_f.attrs['ifos'].replace(' ','')
_, far[ifo_combo_key] = significance.get_far(
_, far[ifo_combo_key], _ = significance.get_far(
bg_f['background/stat'][:],
f['foreground/stat'][:],
bg_f['background/decimation_factor'][:],
bg_f.attrs['background_time'],
**significance_dict[ifo_combo_key])

_, far_exc[ifo_combo_key] = \
_, far_exc[ifo_combo_key], _ = \
significance.get_far(
bg_f['background_exc/stat'][:],
f['foreground/stat'][:],
Expand All @@ -328,15 +328,15 @@ else:
# background included
for f_in in files:
ifo_combo_key = get_ifo_string(f_in).replace(' ','')
_, far[ifo_combo_key] = \
_, far[ifo_combo_key], _ = \
significance.get_far(
f_in['background/stat'][:],
f['foreground/stat'][:],
f_in['background/decimation_factor'][:],
f_in.attrs['background_time'],
**significance_dict[ifo_combo_key])

_, far_exc[ifo_combo_key] = \
_, far_exc[ifo_combo_key], _ = \
significance.get_far(
f_in['background_exc/stat'][:],
f['foreground/stat'][:],
Expand Down Expand Up @@ -607,7 +607,7 @@ while True:
fg_time_ct[key] -= args.cluster_window
bg_t_y = conv.sec_to_year(bg_time_ct[key])
fg_t_y = conv.sec_to_year(fg_time_ct[key])
bg_far, fg_far = significance.get_far(
bg_far, fg_far, _ = significance.get_far(
sep_bg_data[key].data['stat'],
sep_fg_data[key].data['stat'],
sep_bg_data[key].data['decimation_factor'],
Expand All @@ -631,7 +631,7 @@ while True:

logging.info("Recalculating combined IFARs")
for key in all_ifo_combos:
_, far[key] = significance.get_far(
_, far[key], _ = significance.get_far(
sep_bg_data[key].data['stat'],
combined_fg_data.data['stat'],
sep_bg_data[key].data['decimation_factor'],
Expand Down
14 changes: 10 additions & 4 deletions bin/all_sky_search/pycbc_coinc_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ fore_stat = all_trigs.stat[fore_locs]

# Cumulative array of inclusive background triggers and the number of
# inclusive background triggers louder than each foreground trigger
bg_far, fg_far = significance.get_far(
bg_far, fg_far, sig_info = significance.get_far(
back_stat,
fore_stat,
all_trigs.decimation_factor[back_locs],
Expand All @@ -248,7 +248,7 @@ bg_far, fg_far = significance.get_far(

# Cumulative array of exclusive background triggers and the number
# of exclusive background triggers louder than each foreground trigger
bg_far_exc, fg_far_exc = significance.get_far(
bg_far_exc, fg_far_exc, exc_sig_info = significance.get_far(
exc_zero_trigs.stat,
fore_stat,
exc_zero_trigs.decimation_factor,
Expand Down Expand Up @@ -286,10 +286,14 @@ if fore_locs.sum() > 0:
fap = 1 - numpy.exp(- coinc_time / ifar)
f['foreground/ifar'] = conv.sec_to_year(ifar)
f['foreground/fap'] = fap
for key, value in sig_info.items():
f['foreground'].attrs[key] = value
ifar_exc = 1. / fg_far_exc
fap_exc = 1 - numpy.exp(- coinc_time_exc / ifar_exc)
f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
f['foreground/fap_exc'] = fap_exc
for key, value in exc_sig_info.items():
f['foreground'].attrs[key + '_exc'] = value
else:
f['foreground/ifar'] = ifar = numpy.array([])
f['foreground/fap'] = numpy.array([])
Expand Down Expand Up @@ -427,7 +431,7 @@ while numpy.any(ifar_foreground >= background_time):
logging.info("Calculating FAN from background statistic values")
back_stat = all_trigs.stat[back_locs]
fore_stat = all_trigs.stat[fore_locs]
bg_far, fg_far = significance.get_far(
bg_far, fg_far, sig_info = significance.get_far(
back_stat,
fore_stat,
all_trigs.decimation_factor[back_locs],
Expand All @@ -454,7 +458,7 @@ while numpy.any(ifar_foreground >= background_time):
# Exclusive background doesn't change when removing foreground triggers.
# So we don't have to take background ifar, just repopulate ifar_foreground
else :
_, fg_far_exc = significance.get_far(
_, fg_far_exc, _ = significance.get_far(
exc_zero_trigs.stat,
fore_stat,
exc_zero_trigs.decimation_factor,
Expand All @@ -481,6 +485,8 @@ while numpy.any(ifar_foreground >= background_time):
fap = 1 - numpy.exp(- coinc_time / ifar)
f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
f['foreground_h%s/fap' % h_iterations] = fap
for key, value in sig_info.items():
f['foreground_h%' % h_iterations].attrs[key] = value

# Update ifar and fap for other foreground triggers
for i in range(len(ifar)):
Expand Down
4 changes: 3 additions & 1 deletion bin/all_sky_search/pycbc_coinc_statmap_inj
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ f.attrs['foreground_time'] = coinc_time

if len(zdata) > 0:

_, fg_far_exc = significance.get_far(
_, fg_far_exc, exc_sig_info = significance.get_far(
back_stat,
zdata.stat,
dec_fac,
Expand All @@ -105,6 +105,8 @@ if len(zdata) > 0:
fap_exc = 1 - numpy.exp(- coinc_time / ifar_exc)
f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
f['foreground/fap_exc'] = fap_exc
for key, value in exc_sig_info.items():
f['foreground'].attrs[key + '_exc'] = value

else:
f['foreground/ifar_exc'] = numpy.array([])
Expand Down
4 changes: 3 additions & 1 deletion bin/all_sky_search/pycbc_exclude_zerolag
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ for k in filtered_trigs.data:
f_out['background_exc/%s' % k] = filtered_trigs.data[k]

logging.info('Recalculating IFARs')
bg_far, fg_far = significance.get_far(
bg_far, fg_far, sig_info = significance.get_far(
filtered_trigs.data['stat'],
f_in['foreground/stat'][:],
filtered_trigs.data['decimation_factor'],
Expand All @@ -107,6 +107,8 @@ bg_ifar_exc = 1. / bg_far
logging.info('Writing updated ifars to file')
f_out['foreground/ifar_exc'][:] = conv.sec_to_year(fg_ifar_exc)
f_out['background_exc/ifar'][:] = conv.sec_to_year(bg_ifar_exc)
for key, value in sig_info.items():
f_out['foreground'].attrs[key + '_exc'] = value

fg_time_exc = conv.sec_to_year(f_in.attrs['foreground_time_exc'])
f_out['foreground/fap_exc'][:] = 1 - np.exp(-fg_time_exc / fg_ifar_exc)
Expand Down
25 changes: 20 additions & 5 deletions bin/all_sky_search/pycbc_sngls_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ assert ifo + '/time' in all_trigs.data
logging.info("We have %s triggers" % len(all_trigs.stat))
logging.info("Clustering triggers")
all_trigs = all_trigs.cluster(args.cluster_window)
logging.info("%s triggers remain" % len(all_trigs.stat))

fg_time = float(all_trigs.attrs['foreground_time'])

Expand Down Expand Up @@ -137,12 +138,13 @@ significance_dict = significance.digest_significance_options([ifo], args)

# Cumulative array of inclusive background triggers and the number of
# inclusive background triggers louder than each foreground trigger
bg_far, fg_far = significance.get_far(
bg_far, fg_far, sig_info = significance.get_far(
back_stat,
fore_stat,
bkg_dec_facs,
fg_time,
**significance_dict[ifo])
**significance_dict[ifo]
)

fg_far = significance.apply_far_limit(
fg_far,
Expand Down Expand Up @@ -190,7 +192,7 @@ back_exc_locs = back_exc_locs[to_keep]

# Cumulative array of exclusive background triggers and the number
# of exclusive background triggers louder than each foreground trigger
bg_far_exc, fg_far_exc = significance.get_far(
bg_far_exc, fg_far_exc, exc_sig_info = significance.get_far(
back_stat_exc,
fore_stat,
bkg_exc_dec_facs,
Expand Down Expand Up @@ -229,6 +231,10 @@ f['foreground/fap'] = fap
fap_exc = 1 - numpy.exp(- fg_time_exc / fg_ifar_exc)
f['foreground/ifar_exc'] = conv.sec_to_year(fg_ifar_exc)
f['foreground/fap_exc'] = fap_exc
for key, value in sig_info.items():
f['foreground'].attrs[key] = value
for key, value in exc_sig_info.items():
f['foreground'].attrs[f'{key}_exc'] = value

if 'name' in all_trigs.attrs:
f.attrs['name'] = all_trigs.attrs['name']
Expand Down Expand Up @@ -289,6 +295,10 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(fg_ifar)
f['foreground_h%s/ifar_exc' % h_iterations] = conv.sec_to_year(fg_ifar_exc)
f['foreground_h%s/fap' % h_iterations] = fap
for key, value in sig_info.items():
f['foreground_h%s' % h_iterations].attrs[key] = value
for key, value in exc_sig_info.items():
f['foreground_h%s' % h_iterations].attrs[key + "_exc"] = value
for k in all_trigs.data:
f['foreground_h%s/' % h_iterations + k] = all_trigs.data[k]
# Add the iteration number of hierarchical removals done.
Expand Down Expand Up @@ -342,7 +352,7 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
back_stat = fore_stat = all_trigs.stat
bkg_dec_facs = all_trigs.decimation_factor

bg_far, fg_far = significance.get_far(
bg_far, fg_far, sig_info = significance.get_far(
back_stat,
fore_stat,
bkg_dec_facs,
Expand All @@ -368,11 +378,12 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
# triggers are being removed via inclusive or exclusive background.
if is_bkg_inc:
ifar_louder = fg_ifar
exc_sig_info = {}

# Exclusive background doesn't change when removing foreground triggers.
# So we don't have to take bg_far_exc, just repopulate fg_ifar_exc
else:
_, fg_far_exc = significance.get_far(
_, fg_far_exc, exc_sig_info = significance.get_far(
back_stat_exc,
fore_stat,
bkg_exc_dec_facs,
Expand Down Expand Up @@ -400,6 +411,10 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
# Write ranking statistic to file just for downstream plotting code
f['foreground_h%s/stat' % h_iterations] = fore_stat

for key, value in sig_info.items():
f['foreground_h%s' % h_iterations].attrs[key] = value
for key, value in exc_sig_info.items():
f['foreground_h%s' % h_iterations].attrs[key + "_exc"] = value
fap = 1 - numpy.exp(- fg_time / fg_ifar)
f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(fg_ifar)
f['foreground_h%s/fap' % h_iterations] = fap
Expand Down
5 changes: 4 additions & 1 deletion bin/all_sky_search/pycbc_sngls_statmap_inj
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ significance_dict = significance.digest_significance_options([ifo], args)

# Cumulative array of exclusive background triggers and the number
# of exclusive background triggers louder than each foreground trigger
bg_far_exc, fg_far_exc = significance.get_far(
bg_far_exc, fg_far_exc, sig_info = significance.get_far(
back_stat_exc,
fore_stat,
bkg_exc_dec_facs,
Expand All @@ -135,6 +135,9 @@ fap_exc = 1 - numpy.exp(- fg_time_exc / fg_ifar_exc)
f['foreground/ifar_exc'] = conv.sec_to_year(fg_ifar_exc)
f['foreground/fap_exc'] = fap_exc

for key, value in sig_info.items():
f['foreground'].attrs[key + '_exc'] = value

if 'name' in all_trigs.attrs:
f.attrs['name'] = all_trigs.attrs['name']

Expand Down
31 changes: 22 additions & 9 deletions pycbc/events/significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def count_n_louder(bstat, fstat, dec,
The cumulative array of background triggers.
fore_n_louder: numpy.ndarray
The number of background triggers above each foreground trigger
{} : (empty) dictionary
Ensure we return the same tuple of objects as n_louder_from_fit()
"""
sort = bstat.argsort()
bstat = copy.deepcopy(bstat)[sort]
Expand Down Expand Up @@ -84,7 +86,9 @@ def count_n_louder(bstat, fstat, dec,

unsort = sort.argsort()
back_cum_num = n_louder[unsort]
return back_cum_num, fore_n_louder

# Empty dictionary to match the return from n_louder_from_fit
return back_cum_num, fore_n_louder, {}


def n_louder_from_fit(back_stat, fore_stat, dec_facs,
Expand Down Expand Up @@ -117,12 +121,17 @@ def n_louder_from_fit(back_stat, fore_stat, dec_facs,
fg_n_louder: numpy.ndarray
The estimated number of background events louder than each
foreground event
sig_info : a dictionary
Information regarding the significance fit
"""

# Calculate the fitting factor of the ranking statistic distribution
alpha, _ = trstats.fit_above_thresh(fit_function, back_stat,
thresh=fit_threshold,
weights=dec_facs)
alpha, sig_alpha = trstats.fit_above_thresh(
fit_function,
back_stat,
thresh=fit_threshold,
weights=dec_facs
)

# Count background events above threshold as the cum_fit is
# normalised to 1
Expand Down Expand Up @@ -153,7 +162,7 @@ def n_louder_from_fit(back_stat, fore_stat, dec_facs,

# Count the number of below-threshold background events louder than the
# bg and foreground
bg_n_louder[bg_below], fg_n_louder[fg_below] = count_n_louder(
bg_n_louder[bg_below], fg_n_louder[fg_below], _ = count_n_louder(
back_stat[bg_below],
fore_stat[fg_below],
dec_facs[bg_below]
Expand All @@ -165,7 +174,8 @@ def n_louder_from_fit(back_stat, fore_stat, dec_facs,
bg_n_louder[bg_below] += n_above
fg_n_louder[fg_below] += n_above

return bg_n_louder, fg_n_louder
sig_info = {'alpha': alpha, 'sig_alpha': sig_alpha, 'n_above': n_above}
return bg_n_louder, fg_n_louder, sig_info


_significance_meth_dict = {
Expand Down Expand Up @@ -221,7 +231,7 @@ def get_far(back_stat, fore_stat, dec_facs,
a FAR
"""
bg_n_louder, fg_n_louder = get_n_louder(
bg_n_louder, fg_n_louder, significance_info = get_n_louder(
back_stat,
fore_stat,
dec_facs,
Expand All @@ -239,7 +249,10 @@ def get_far(back_stat, fore_stat, dec_facs,
bg_far = bg_n_louder / background_time
fg_far = fg_n_louder / background_time

return bg_far, fg_far
if "n_above" in significance_info:
significance_info["rate_above"] = significance_info["n_above"] / background_time

return bg_far, fg_far, significance_info


def insert_significance_option_group(parser):
Expand Down Expand Up @@ -288,6 +301,7 @@ def positive_float(inp):
logger.warning("Value provided to positive_float is less than zero, "
"this is not allowed")
raise ValueError

return fl_in


Expand Down Expand Up @@ -366,7 +380,6 @@ def ifar_opt_to_far_limit(ifar_str):
"""
ifar_float = positive_float(ifar_str)

far_hz = 0. if (ifar_float == 0.) else conv.sec_to_year(1. / ifar_float)

return far_hz
Expand Down
5 changes: 4 additions & 1 deletion test/test_significance_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def setUp(self):
method_dict['fit_threshold'] = None if not function else 0

def meth_test(self, md=method_dict):
bg_n_louder, fg_n_louder = significance.get_n_louder(
bg_n_louder, fg_n_louder, sig_info = significance.get_n_louder(
self.test_bg_stat,
self.test_fg_stat,
self.dec_facs,
Expand Down Expand Up @@ -207,6 +207,9 @@ def meth_test(self, md=method_dict):
self.assertTrue(np.array_equal(fg_n_louder[fore_stat_sort],
fg_n_louder[fore_far_sort][::-1]))

# Tests on the significance info output dictionary
self.assertTrue(isinstance(sig_info, dict))

setattr(SignificanceMethodTest,
'test_%s_%s' % (method, function),
meth_test)
Expand Down

0 comments on commit fb838c6

Please sign in to comment.