Save statmap significance fits information (gwastro#4951)

* Report significance fit info in sngls_findtrigs * Report significance calculation info in statmap jobs * Fix typo, fix test * TD comments, some tidying up * neaten comments * comment fix --------- Co-authored-by: Thomas Dent <thomas.dent@usc.es>
GarethCabournDavies · Dec 11, 2024 · fb838c6 · fb838c6
1 parent d57077f
commit fb838c6
Show file tree

Hide file tree

Showing 8 changed files with 72 additions and 28 deletions.
diff --git a/bin/all_sky_search/pycbc_add_statmap b/bin/all_sky_search/pycbc_add_statmap
@@ -308,14 +308,14 @@ if injection_style:
     for bg_fname in args.background_files:
         bg_f = pycbc.io.HFile(bg_fname, 'r')
         ifo_combo_key = bg_f.attrs['ifos'].replace(' ','')
-        _, far[ifo_combo_key] = significance.get_far(
+        _, far[ifo_combo_key], _ = significance.get_far(
                 bg_f['background/stat'][:],
                 f['foreground/stat'][:],
                 bg_f['background/decimation_factor'][:],
                 bg_f.attrs['background_time'],
                 **significance_dict[ifo_combo_key])
 
-        _, far_exc[ifo_combo_key] = \
+        _, far_exc[ifo_combo_key], _ = \
             significance.get_far(
                 bg_f['background_exc/stat'][:],
                 f['foreground/stat'][:],
@@ -328,15 +328,15 @@ else:
     # background included
     for f_in in files:
         ifo_combo_key = get_ifo_string(f_in).replace(' ','')
-        _, far[ifo_combo_key] = \
+        _, far[ifo_combo_key], _ = \
             significance.get_far(
                 f_in['background/stat'][:],
                 f['foreground/stat'][:],
                 f_in['background/decimation_factor'][:],
                 f_in.attrs['background_time'],
                 **significance_dict[ifo_combo_key])
 
-        _, far_exc[ifo_combo_key] = \
+        _, far_exc[ifo_combo_key], _ = \
             significance.get_far(
                 f_in['background_exc/stat'][:],
                 f['foreground/stat'][:],
@@ -607,7 +607,7 @@ while True:
         fg_time_ct[key] -= args.cluster_window
         bg_t_y = conv.sec_to_year(bg_time_ct[key])
         fg_t_y = conv.sec_to_year(fg_time_ct[key])
-        bg_far, fg_far = significance.get_far(
+        bg_far, fg_far, _ = significance.get_far(
             sep_bg_data[key].data['stat'],
             sep_fg_data[key].data['stat'],
             sep_bg_data[key].data['decimation_factor'],
@@ -631,7 +631,7 @@ while True:
 
     logging.info("Recalculating combined IFARs")
     for key in all_ifo_combos:
-        _, far[key] = significance.get_far(
+        _, far[key], _ = significance.get_far(
             sep_bg_data[key].data['stat'],
             combined_fg_data.data['stat'],
             sep_bg_data[key].data['decimation_factor'],

diff --git a/bin/all_sky_search/pycbc_coinc_statmap b/bin/all_sky_search/pycbc_coinc_statmap
@@ -239,7 +239,7 @@ fore_stat = all_trigs.stat[fore_locs]
 
 # Cumulative array of inclusive background triggers and the number of
 # inclusive background triggers louder than each foreground trigger
-bg_far, fg_far = significance.get_far(
+bg_far, fg_far, sig_info = significance.get_far(
     back_stat,
     fore_stat,
     all_trigs.decimation_factor[back_locs],
@@ -248,7 +248,7 @@ bg_far, fg_far = significance.get_far(
 
 # Cumulative array of exclusive background triggers and the number
 # of exclusive background triggers louder than each foreground trigger
-bg_far_exc, fg_far_exc = significance.get_far(
+bg_far_exc, fg_far_exc, exc_sig_info = significance.get_far(
     exc_zero_trigs.stat,
     fore_stat,
     exc_zero_trigs.decimation_factor,
@@ -286,10 +286,14 @@ if fore_locs.sum() > 0:
     fap = 1 - numpy.exp(- coinc_time / ifar)
     f['foreground/ifar'] = conv.sec_to_year(ifar)
     f['foreground/fap'] = fap
+    for key, value in sig_info.items():
+        f['foreground'].attrs[key] = value
     ifar_exc = 1. / fg_far_exc
     fap_exc = 1 - numpy.exp(- coinc_time_exc / ifar_exc)
     f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
     f['foreground/fap_exc'] = fap_exc
+    for key, value in exc_sig_info.items():
+        f['foreground'].attrs[key + '_exc'] = value
 else:
     f['foreground/ifar'] = ifar = numpy.array([])
     f['foreground/fap'] = numpy.array([])
@@ -427,7 +431,7 @@ while numpy.any(ifar_foreground >= background_time):
     logging.info("Calculating FAN from background statistic values")
     back_stat = all_trigs.stat[back_locs]
     fore_stat = all_trigs.stat[fore_locs]
-    bg_far, fg_far = significance.get_far(
+    bg_far, fg_far, sig_info = significance.get_far(
         back_stat,
         fore_stat,
         all_trigs.decimation_factor[back_locs],
@@ -454,7 +458,7 @@ while numpy.any(ifar_foreground >= background_time):
     # Exclusive background doesn't change when removing foreground triggers.
     # So we don't have to take background ifar, just repopulate ifar_foreground
     else :
-        _, fg_far_exc = significance.get_far(
+        _, fg_far_exc, _ = significance.get_far(
             exc_zero_trigs.stat,
             fore_stat,
             exc_zero_trigs.decimation_factor,
@@ -481,6 +485,8 @@ while numpy.any(ifar_foreground >= background_time):
         fap = 1 - numpy.exp(- coinc_time / ifar)
         f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
         f['foreground_h%s/fap' % h_iterations] = fap
+        for key, value in sig_info.items():
+            f['foreground_h%' % h_iterations].attrs[key] = value
 
         # Update ifar and fap for other foreground triggers
         for i in range(len(ifar)):

diff --git a/bin/all_sky_search/pycbc_coinc_statmap_inj b/bin/all_sky_search/pycbc_coinc_statmap_inj
@@ -88,7 +88,7 @@ f.attrs['foreground_time'] = coinc_time
 
 if len(zdata) > 0:
 
-    _, fg_far_exc = significance.get_far(
+    _, fg_far_exc, exc_sig_info = significance.get_far(
         back_stat,
         zdata.stat,
         dec_fac,
@@ -105,6 +105,8 @@ if len(zdata) > 0:
     fap_exc = 1 - numpy.exp(- coinc_time / ifar_exc)
     f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
     f['foreground/fap_exc'] = fap_exc
+    for key, value in exc_sig_info.items():
+        f['foreground'].attrs[key + '_exc'] = value
 
 else:
     f['foreground/ifar_exc'] = numpy.array([])

diff --git a/bin/all_sky_search/pycbc_exclude_zerolag b/bin/all_sky_search/pycbc_exclude_zerolag
@@ -91,7 +91,7 @@ for k in filtered_trigs.data:
     f_out['background_exc/%s' % k] = filtered_trigs.data[k]
 
 logging.info('Recalculating IFARs')
-bg_far, fg_far = significance.get_far(
+bg_far, fg_far, sig_info = significance.get_far(
     filtered_trigs.data['stat'],
     f_in['foreground/stat'][:],
     filtered_trigs.data['decimation_factor'],
@@ -107,6 +107,8 @@ bg_ifar_exc = 1. / bg_far
 logging.info('Writing updated ifars to file')
 f_out['foreground/ifar_exc'][:] = conv.sec_to_year(fg_ifar_exc)
 f_out['background_exc/ifar'][:] = conv.sec_to_year(bg_ifar_exc)
+for key, value in sig_info.items():
+    f_out['foreground'].attrs[key + '_exc'] = value
 
 fg_time_exc = conv.sec_to_year(f_in.attrs['foreground_time_exc'])
 f_out['foreground/fap_exc'][:] = 1 - np.exp(-fg_time_exc / fg_ifar_exc)

diff --git a/bin/all_sky_search/pycbc_sngls_statmap b/bin/all_sky_search/pycbc_sngls_statmap
@@ -107,6 +107,7 @@ assert ifo + '/time' in all_trigs.data
 logging.info("We have %s triggers" % len(all_trigs.stat))
 logging.info("Clustering triggers")
 all_trigs = all_trigs.cluster(args.cluster_window)
+logging.info("%s triggers remain" % len(all_trigs.stat))
 
 fg_time = float(all_trigs.attrs['foreground_time'])
 
@@ -137,12 +138,13 @@ significance_dict = significance.digest_significance_options([ifo], args)
 
 # Cumulative array of inclusive background triggers and the number of
 # inclusive background triggers louder than each foreground trigger
-bg_far, fg_far = significance.get_far(
+bg_far, fg_far, sig_info = significance.get_far(
     back_stat,
     fore_stat,
     bkg_dec_facs,
     fg_time,
-    **significance_dict[ifo])
+    **significance_dict[ifo]
+)
 
 fg_far = significance.apply_far_limit(
     fg_far,
@@ -190,7 +192,7 @@ back_exc_locs = back_exc_locs[to_keep]
 
 # Cumulative array of exclusive background triggers and the number
 # of exclusive background triggers louder than each foreground trigger
-bg_far_exc, fg_far_exc = significance.get_far(
+bg_far_exc, fg_far_exc, exc_sig_info = significance.get_far(
     back_stat_exc,
     fore_stat,
     bkg_exc_dec_facs,
@@ -229,6 +231,10 @@ f['foreground/fap'] = fap
 fap_exc = 1 - numpy.exp(- fg_time_exc / fg_ifar_exc)
 f['foreground/ifar_exc'] = conv.sec_to_year(fg_ifar_exc)
 f['foreground/fap_exc'] = fap_exc
+for key, value in sig_info.items():
+    f['foreground'].attrs[key] = value
+for key, value in exc_sig_info.items():
+    f['foreground'].attrs[f'{key}_exc'] = value
 
 if 'name' in all_trigs.attrs:
     f.attrs['name'] = all_trigs.attrs['name']
@@ -289,6 +295,10 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
         f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(fg_ifar)
         f['foreground_h%s/ifar_exc' % h_iterations] = conv.sec_to_year(fg_ifar_exc)
         f['foreground_h%s/fap' % h_iterations] = fap
+        for key, value in sig_info.items():
+            f['foreground_h%s' % h_iterations].attrs[key] = value
+        for key, value in exc_sig_info.items():
+            f['foreground_h%s' % h_iterations].attrs[key + "_exc"] = value
         for k in all_trigs.data:
             f['foreground_h%s/' % h_iterations + k] = all_trigs.data[k]
     # Add the iteration number of hierarchical removals done.
@@ -342,7 +352,7 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
     back_stat = fore_stat = all_trigs.stat
     bkg_dec_facs = all_trigs.decimation_factor
 
-    bg_far, fg_far = significance.get_far(
+    bg_far, fg_far, sig_info = significance.get_far(
         back_stat,
         fore_stat,
         bkg_dec_facs,
@@ -368,11 +378,12 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
     # triggers are being removed via inclusive or exclusive background.
     if is_bkg_inc:
         ifar_louder = fg_ifar
+        exc_sig_info = {}
 
     # Exclusive background doesn't change when removing foreground triggers.
     # So we don't have to take bg_far_exc, just repopulate fg_ifar_exc
     else:
-        _, fg_far_exc = significance.get_far(
+        _, fg_far_exc, exc_sig_info = significance.get_far(
             back_stat_exc,
             fore_stat,
             bkg_exc_dec_facs,
@@ -400,6 +411,10 @@ while numpy.any(ifar_louder > hier_ifar_thresh_s):
         # Write ranking statistic to file just for downstream plotting code
         f['foreground_h%s/stat' % h_iterations] = fore_stat
 
+        for key, value in sig_info.items():
+            f['foreground_h%s' % h_iterations].attrs[key] = value
+        for key, value in exc_sig_info.items():
+            f['foreground_h%s' % h_iterations].attrs[key + "_exc"] = value
         fap = 1 - numpy.exp(- fg_time / fg_ifar)
         f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(fg_ifar)
         f['foreground_h%s/fap' % h_iterations] = fap

diff --git a/bin/all_sky_search/pycbc_sngls_statmap_inj b/bin/all_sky_search/pycbc_sngls_statmap_inj
@@ -108,7 +108,7 @@ significance_dict = significance.digest_significance_options([ifo], args)
 
 # Cumulative array of exclusive background triggers and the number
 # of exclusive background triggers louder than each foreground trigger
-bg_far_exc, fg_far_exc = significance.get_far(
+bg_far_exc, fg_far_exc, sig_info = significance.get_far(
     back_stat_exc,
     fore_stat,
     bkg_exc_dec_facs,
@@ -135,6 +135,9 @@ fap_exc = 1 - numpy.exp(- fg_time_exc / fg_ifar_exc)
 f['foreground/ifar_exc'] = conv.sec_to_year(fg_ifar_exc)
 f['foreground/fap_exc'] = fap_exc
 
+for key, value in sig_info.items():
+    f['foreground'].attrs[key + '_exc'] = value
+
 if 'name' in all_trigs.attrs:
     f.attrs['name'] = all_trigs.attrs['name']
 

diff --git a/pycbc/events/significance.py b/pycbc/events/significance.py
@@ -55,6 +55,8 @@ def count_n_louder(bstat, fstat, dec,
         The cumulative array of background triggers.
     fore_n_louder: numpy.ndarray
         The number of background triggers above each foreground trigger
+    {} : (empty) dictionary
+        Ensure we return the same tuple of objects as n_louder_from_fit()
     """
     sort = bstat.argsort()
     bstat = copy.deepcopy(bstat)[sort]
@@ -84,7 +86,9 @@ def count_n_louder(bstat, fstat, dec,
 
     unsort = sort.argsort()
     back_cum_num = n_louder[unsort]
-    return back_cum_num, fore_n_louder
+
+    # Empty dictionary to match the return from n_louder_from_fit
+    return back_cum_num, fore_n_louder, {}
 
 
 def n_louder_from_fit(back_stat, fore_stat, dec_facs,
@@ -117,12 +121,17 @@ def n_louder_from_fit(back_stat, fore_stat, dec_facs,
     fg_n_louder: numpy.ndarray
         The estimated number of background events louder than each
         foreground event
+    sig_info : a dictionary
+        Information regarding the significance fit
     """
 
     # Calculate the fitting factor of the ranking statistic distribution
-    alpha, _ = trstats.fit_above_thresh(fit_function, back_stat,
-                                        thresh=fit_threshold,
-                                        weights=dec_facs)
+    alpha, sig_alpha = trstats.fit_above_thresh(
+        fit_function,
+        back_stat,
+        thresh=fit_threshold,
+        weights=dec_facs
+    )
 
     # Count background events above threshold as the cum_fit is
     # normalised to 1
@@ -153,7 +162,7 @@ def n_louder_from_fit(back_stat, fore_stat, dec_facs,
 
     # Count the number of below-threshold background events louder than the
     # bg and foreground
-    bg_n_louder[bg_below], fg_n_louder[fg_below] = count_n_louder(
+    bg_n_louder[bg_below], fg_n_louder[fg_below], _ = count_n_louder(
         back_stat[bg_below],
         fore_stat[fg_below],
         dec_facs[bg_below]
@@ -165,7 +174,8 @@ def n_louder_from_fit(back_stat, fore_stat, dec_facs,
     bg_n_louder[bg_below] += n_above
     fg_n_louder[fg_below] += n_above
 
-    return bg_n_louder, fg_n_louder
+    sig_info = {'alpha': alpha, 'sig_alpha': sig_alpha, 'n_above': n_above}
+    return bg_n_louder, fg_n_louder, sig_info
 
 
 _significance_meth_dict = {
@@ -221,7 +231,7 @@ def get_far(back_stat, fore_stat, dec_facs,
        a FAR
 
     """
-    bg_n_louder, fg_n_louder = get_n_louder(
+    bg_n_louder, fg_n_louder, significance_info = get_n_louder(
         back_stat,
         fore_stat,
         dec_facs,
@@ -239,7 +249,10 @@ def get_far(back_stat, fore_stat, dec_facs,
     bg_far = bg_n_louder / background_time
     fg_far = fg_n_louder / background_time
 
-    return bg_far, fg_far
+    if "n_above" in significance_info:
+        significance_info["rate_above"] = significance_info["n_above"] / background_time
+
+    return bg_far, fg_far, significance_info
 
 
 def insert_significance_option_group(parser):
@@ -288,6 +301,7 @@ def positive_float(inp):
         logger.warning("Value provided to positive_float is less than zero, "
                        "this is not allowed")
         raise ValueError
+
     return fl_in
 
 
@@ -366,7 +380,6 @@ def ifar_opt_to_far_limit(ifar_str):
 
     """
     ifar_float = positive_float(ifar_str)
-
     far_hz = 0. if (ifar_float == 0.) else conv.sec_to_year(1. / ifar_float)
 
     return far_hz

diff --git a/test/test_significance_module.py b/test/test_significance_module.py
@@ -170,7 +170,7 @@ def setUp(self):
         method_dict['fit_threshold'] = None if not function else 0
 
         def meth_test(self, md=method_dict):
-            bg_n_louder, fg_n_louder = significance.get_n_louder(
+            bg_n_louder, fg_n_louder, sig_info = significance.get_n_louder(
                 self.test_bg_stat,
                 self.test_fg_stat,
                 self.dec_facs,
@@ -207,6 +207,9 @@ def meth_test(self, md=method_dict):
             self.assertTrue(np.array_equal(fg_n_louder[fore_stat_sort],
                                            fg_n_louder[fore_far_sort][::-1]))
 
+            # Tests on the significance info output dictionary
+            self.assertTrue(isinstance(sig_info, dict))
+
         setattr(SignificanceMethodTest,
                 'test_%s_%s' % (method, function),
                 meth_test)