From 1c22455bd9bb035964e998a72f2ce2826c7b2209 Mon Sep 17 00:00:00 2001
From: Gareth S Cabourn Davies <gareth.cabourndavies@ligo.org>
Date: Thu, 23 Nov 2023 13:33:00 +0000
Subject: [PATCH] Use gating windows in sngl_minifollowup and allow 'triggers
 within vetoes' option (#4549)

* Use gating windows in sngl_minifollowup - also allow 'triggers within vetoes' option

* Minor fixes

* add infor if fewer triggers than requested

* Allow pycbc_sngls_minifollowup to use single ranking statistic

* Fix bug in rank_stat_single for quadsum/phasetd statistics

* mask may be boolean now

* fix broken pycbc_page_snglinfo due to bugfix

* page_snglinfo to use the ranking statistic used

* missed change
---
 bin/minifollowups/pycbc_page_snglinfo         |   7 +-
 bin/minifollowups/pycbc_sngl_minifollowup     | 172 +++++++++++++++---
 .../pycbc_make_offline_search_workflow        |   5 +-
 pycbc/events/stat.py                          |  10 +-
 pycbc/io/hdf.py                               |   3 +-
 pycbc/workflow/minifollowups.py               |  12 +-
 6 files changed, 170 insertions(+), 39 deletions(-)

diff --git a/bin/minifollowups/pycbc_page_snglinfo b/bin/minifollowups/pycbc_page_snglinfo
index e6bc8dc31a4..eb248daecd1 100644
--- a/bin/minifollowups/pycbc_page_snglinfo
+++ b/bin/minifollowups/pycbc_page_snglinfo
@@ -138,8 +138,11 @@ else:
 
 if args.ranking_statistic in ["quadsum", "single_ranking_only"]:
     stat_name = sngl_stat_name
+    stat_name_long = sngl_stat_name
 else:
-    stat_name = '_with_'.join(ranking_statistic, sngl_ranking)
+    # Name would be too long - just call it ranking statistic
+    stat_name = 'Ranking Statistic'
+    stat_name_long = ' with '.join([args.ranking_statistic, args.sngl_ranking])
 
 headers.append(stat_name)
 
@@ -201,7 +204,7 @@ html = pycbc.results.dq.redirect_javascript + \
 if args.n_loudest:
     title = 'Parameters of single-detector event ranked %s' \
         % (args.n_loudest + 1)
-    caption = 'Parameters of the single-detector event ranked number %s by %s. The figures below show the mini-followup data for this event.' % (args.n_loudest + 1, stat_name)
+    caption = 'Parameters of the single-detector event ranked number %s by %s. The figures below show the mini-followup data for this event.' % (args.n_loudest + 1, stat_name_long)
 else:
     title = 'Parameters of single-detector event'
     caption = 'Parameters of the single-detector event. The figures below show the mini-followup data for this event.'
diff --git a/bin/minifollowups/pycbc_sngl_minifollowup b/bin/minifollowups/pycbc_sngl_minifollowup
index bd3ae4bed1a..01fbc743e83 100644
--- a/bin/minifollowups/pycbc_sngl_minifollowup
+++ b/bin/minifollowups/pycbc_sngl_minifollowup
@@ -14,29 +14,24 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-""" Followup foreground events
+"""
+Followup single-detector triggers which do not contribute to foreground events
 """
 import os, argparse, logging
 import numpy
 from ligo.lw import table
 from ligo.lw import utils as ligolw_utils
 from pycbc.results import layout
+from pycbc.types.optparse import MultiDetOptionAction
 from pycbc.events import select_segments_by_definer
 import pycbc.workflow.minifollowups as mini
 import pycbc.version
 import pycbc.workflow as wf
 import pycbc.events
 from pycbc.workflow.core import resolve_url_to_file
-from pycbc.events import stat
+from pycbc.events import stat, veto, coinc
 from pycbc.io import hdf
 
-def add_wiki_row(outfile, cols):
-    """
-    Adds a wiki-formatted row to an output file from a list or a numpy array.
-    """
-    with open(outfile, 'a') as f:
-        f.write('||%s||\n' % '||'.join(map(str,cols)))
-
 parser = argparse.ArgumentParser(description=__doc__[1:])
 parser.add_argument('--version', action='version', version=pycbc.version.git_verbose_msg)
 parser.add_argument('--bank-file',
@@ -44,11 +39,23 @@ parser.add_argument('--bank-file',
 parser.add_argument('--single-detector-file',
                     help="HDF format merged single detector trigger files")
 parser.add_argument('--instrument', help="Name of interferometer e.g. H1")
+parser.add_argument('--foreground-censor-file',
+                    help="The censor file to be used if vetoing triggers "
+                         "in the foreground of the search (optional).")
+parser.add_argument('--foreground-segment-name',
+                    help="If using foreground censor file must also provide "
+                         "the name of the segment to use as a veto.")
 parser.add_argument('--veto-file',
-    help="The veto file to be used if vetoing triggers (optional).")
+                    help="The veto file to be used if vetoing triggers "
+                         "(optional).")
 parser.add_argument('--veto-segment-name',
-    help="If using veto file must also provide the name of the segment to use "
-         "as a veto.")
+                    help="If using veto file must also provide the name of "
+                         "the segment to use as a veto.")
+parser.add_argument("--gating-veto-windows", nargs='+',
+                    action=MultiDetOptionAction,
+                    help="Seconds to be vetoed before and after the central time "
+                         "of each gate. Given as detector-values pairs, e.g. "
+                         "H1:-1,2.5 L1:-1,2.5 V1:0,0")
 parser.add_argument('--inspiral-segments',
                     help="xml segment file containing the inspiral analysis "
                          "times")
@@ -60,17 +67,22 @@ parser.add_argument('--inspiral-data-analyzed-name',
                          "analyzed by each analysis job.")
 parser.add_argument('--min-snr', type=float, default=6.5,
                     help="Minimum SNR to consider for loudest triggers")
-parser.add_argument('--non-coinc-time-only', default=False,
-                    action='store_true',
+parser.add_argument('--non-coinc-time-only', action='store_true',
                     help="If given remove (veto) single-detector triggers "
                          "that occur during a time when at least one other "
                          "instrument is taking science data.")
+parser.add_argument('--vetoed-time-only', action='store_true',
+                    help="If given, only report on single-detector triggers "
+                         "that occur during vetoed times.")
 parser.add_argument('--minimum-duration', default=None, type=float,
                     help="If given only consider single-detector triggers "
                          "with template duration larger than this.")
 parser.add_argument('--maximum-duration', default=None, type=float,
                     help="If given only consider single-detector triggers "
                          "with template duration smaller than this.")
+parser.add_argument('--cluster-window', type=float, default=10,
+                    help="Window (seconds) over which to cluster triggers "
+                         "when finding the loudest-ranked. Default=10")
 wf.add_workflow_command_line_group(parser)
 wf.add_workflow_settings_cli(parser, include_subdax_opts=True)
 stat.insert_statistic_option_group(parser,
@@ -95,6 +107,12 @@ sngl_file = resolve_url_to_file(
     attrs={'ifos': args.instrument}
 )
 
+# Flatten the statistic_files option:
+statfiles = []
+for f in sum(args.statistic_files, []):
+    statfiles.append(resolve_url_to_file(os.path.abspath(f)))
+statfiles = wf.FileList(statfiles) if statfiles is not [] else None
+
 if args.veto_file is not None:
     veto_file = resolve_url_to_file(
         os.path.abspath(args.veto_file),
@@ -102,6 +120,7 @@ if args.veto_file is not None:
     )
 else:
     veto_file = None
+
 insp_segs = resolve_url_to_file(os.path.abspath(args.inspiral_segments))
 insp_data_seglists = select_segments_by_definer\
         (args.inspiral_segments, segment_name=args.inspiral_data_read_name,
@@ -112,20 +131,89 @@ num_events = int(workflow.cp.get_opt_tags('workflow-sngl_minifollowups',
                  'num-sngl-events', ''))
 
 # This helps speed up the processing to ignore a large fraction of triggers
-snr_mask = None
+mask = None
+f = hdf.HFile(args.single_detector_file, 'r')
+n_triggers = f['{}/snr'.format(args.instrument)].size
+logging.info("%i triggers in file", n_triggers)
 if args.min_snr:
     logging.info('Calculating Prefilter')
-    f = hdf.HFile(args.single_detector_file, 'r')
     idx, _ = f.select(lambda snr: snr > args.min_snr,
                       '{}/snr'.format(args.instrument),
                       return_indices=True)
-    snr_mask = numpy.zeros(len(f['{}/snr'.format(args.instrument)]),
-                           dtype=bool)
-    snr_mask[idx] = True
+    mask = numpy.zeros(n_triggers, dtype=bool)
+    mask[idx] = True
+    if len(idx) < num_events:
+        logging.info("Fewer triggers exist after the --min-snr cut (%d) "
+                     "than requested for the minifollowup (%d)",
+                     len(idx), num_events)
 
-trigs = hdf.SingleDetTriggers(args.single_detector_file, args.bank_file,
-                              args.veto_file, args.veto_segment_name,
-                              None, args.instrument, premask=snr_mask)
+trigs = hdf.SingleDetTriggers(
+    args.single_detector_file,
+    args.bank_file,
+    args.foreground_censor_file,
+    args.foreground_segment_name,
+    None,
+    args.instrument,
+    premask=mask
+)
+
+# Include gating vetoes
+if args.gating_veto_windows:
+    logging.info("Getting gating vetoes")
+    gating_veto = args.gating_veto_windows[args.instrument].split(',')
+    gveto_before = float(gating_veto[0])
+    gveto_after = float(gating_veto[1])
+    if gveto_before > 0 or gveto_after < 0:
+        raise ValueError("Gating veto window values must be negative before "
+                         "gates and positive after gates.")
+    if not (gveto_before == 0 and gveto_after == 0):
+        gate_group = f[args.instrument + '/gating/']
+        autogate_times = numpy.unique(gate_group['auto/time'][:])
+        if 'file' in gate_group:
+            detgate_times = gate_group['file/time'][:]
+        else:
+            detgate_times = []
+        gate_times = numpy.concatenate((autogate_times, detgate_times))
+        gveto_idx = veto.indices_within_times(
+            trigs.end_time,
+            gate_times + gveto_before,
+            gate_times + gveto_after
+        )
+        logging.info('%i triggers in gating vetoes', gveto_idx.size)
+else:
+    gveto_idx = numpy.array([], dtype=numpy.uint64)
+
+if args.veto_file:
+    logging.info('Getting file vetoes')
+    # veto_mask is an array of indices into the trigger arrays
+    # giving the surviving triggers
+    veto_file_idx, _ = events.veto.indices_within_segments(
+        trigs.end_time,
+        [args.veto_file],
+        ifo=args.instrument,
+        segment_name=args.veto_segment_name
+    )
+
+    logging.info('%i triggers in file-vetoed segments',
+                 veto_file_idx.size)
+else:
+    veto_file_idx = numpy.array([], dtype=numpy.uint64)
+
+# Work out indices we are going to keep / remove
+vetoed_idx = numpy.unique(numpy.concatenate((veto_file_idx, gveto_idx)))
+# Needs to be in ascending order
+vetoed_idx = numpy.sort(vetoed_idx).astype(numpy.uint64)
+
+if args.vetoed_time_only and vetoed_idx.size > 0:
+    logging.info("Applying mask to keep only triggers within vetoed time")
+    trigs.apply_mask(vetoed_idx)
+elif vetoed_idx.size > 0:
+    logging.info("Applying mask to keep only triggers outwith vetoed time")
+    veto_mask = numpy.ones(trigs.end_time.size, dtype=bool)
+    veto_mask[vetoed_idx] = False
+    trigs.apply_mask(veto_mask)
+elif args.vetoed_time_only and vetoed_idx.size == 0:
+    logging.warning("No triggers exist inside vetoed times")
 
 if args.non_coinc_time_only:
     from pycbc.io.ligolw import LIGOLWContentHandler as h
@@ -158,29 +246,56 @@ if args.maximum_duration is not None:
     trigs.apply_mask(lgc_mask)
     logging.info('remaining triggers: %s', trigs.mask.sum())
 
-logging.info('finding loudest clustered events')
+logging.info('Finding loudest clustered events')
 rank_method = stat.get_statistic_from_opts(args, [args.instrument])
-trigs.mask_to_n_loudest_clustered_events(rank_method, n_loudest=num_events)
 
-if len(trigs.stat) < num_events:
-    num_events = len(trigs.stat)
+extra_kwargs = {}
+for inputstr in args.statistic_keywords:
+    try:
+        key, value = inputstr.split(':')
+        extra_kwargs[key] = value
+    except ValueError:
+        err_txt = "--statistic-keywords must take input in the " \
+                  "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \
+                  "Received {}".format(args.statistic_keywords)
+        raise ValueError(err_txt)
+
+logging.info("Calculating statistic for %d triggers", len(trigs.snr))
+sds = rank_method.single(trigs)
+stat = rank_method.rank_stat_single((args.instrument, sds), **extra_kwargs)
+logging.info("Clustering events over %.3fs window", args.cluster_window)
+cid = coinc.cluster_over_time(stat, trigs.end_time,
+                              args.cluster_window)
+trigs.apply_mask(cid)
+stat = stat[cid]
+if len(trigs.snr) < num_events:
+    num_events = len(trigs.snr)
+
+logging.info("Finding the loudest triggers")
+loudest_idx = sorted(numpy.argsort(stat)[::-1][:num_events])
+trigs.apply_mask(loudest_idx)
+stat = stat[loudest_idx]
 
 times = trigs.end_time
 tids = trigs.template_id
 
 # loop over number of loudest events to be followed up
-order = trigs.stat.argsort()[::-1]
+order = stat.argsort()[::-1]
 for rank, num_event in enumerate(order):
     logging.info('Processing event: %s', num_event)
 
     files = wf.FileList([])
     time = times[num_event]
     ifo_time = '%s:%s' %(args.instrument, str(time))
-    tid = trigs.mask[num_event]
+    if isinstance(trigs.mask, numpy.ndarray) and trigs.mask.dtype == bool:
+        tid = numpy.flatnonzero(trigs.mask)[num_event]
+    else:
+        tid = trigs.mask[num_event]
     ifo_tid = '%s:%s' %(args.instrument, str(tid))
 
     layouts += (mini.make_sngl_ifo(workflow, sngl_file, tmpltbank_file,
                                    tid, args.output_dir, args.instrument,
+                                   statfiles=statfiles,
                                    tags=args.tags + [str(rank)]),)
     files += mini.make_trigger_timeseries(workflow, [sngl_file],
                               ifo_time, args.output_dir, special_tids=ifo_tid,
@@ -217,7 +332,6 @@ for rank, num_event in enumerate(order):
                                         args.output_dir, [args.instrument],
                                         tags=args.tags + [str(rank)])
 
-
     files += mini.make_singles_timefreq(workflow, sngl_file, tmpltbank_file,
                             time, args.output_dir,
                             data_segments=insp_data_seglists,
diff --git a/bin/workflows/pycbc_make_offline_search_workflow b/bin/workflows/pycbc_make_offline_search_workflow
index 9b3a7da8678..e0995d33cc1 100755
--- a/bin/workflows/pycbc_make_offline_search_workflow
+++ b/bin/workflows/pycbc_make_offline_search_workflow
@@ -188,7 +188,7 @@ insps = wf.merge_single_detector_hdf_files(workflow, hdfbank,
                                            tags=['full_data'])
 
 # setup sngl trigger distribution fitting jobs
-# 'statfiles' is list of files used in calculating coinc statistic
+# 'statfiles' is list of files used in calculating statistic
 # 'dqfiles' is the subset of files containing data quality information
 statfiles = []
 dqfiles = []
@@ -458,7 +458,8 @@ for insp_file in full_insps:
         wf.setup_single_det_minifollowups\
             (workflow, insp_file, hdfbank, insp_files_seg_file,
              data_analysed_name, trig_generated_name, 'daxes', currdir,
-             veto_file=censored_veto, veto_segment_name='closed_box',
+             statfiles=wf.FileList(statfiles),
+             fg_file=censored_veto, fg_name='closed_box',
              tags=insp_file.tags + [subsec])
 
 ##################### COINC FULL_DATA plots ###################################
diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py
index 6966e2586ae..4151571e356 100644
--- a/pycbc/events/stat.py
+++ b/pycbc/events/stat.py
@@ -211,6 +211,9 @@ def rank_stat_single(self, single_info,
         """
         Calculate the statistic for a single detector candidate
 
+        For this statistic this is just passing through the
+        single value, which will be the second entry in the tuple.
+
         Parameters
         ----------
         single_info: tuple
@@ -222,7 +225,7 @@ def rank_stat_single(self, single_info,
         numpy.ndarray
             The array of single detector statistics
         """
-        return self.single(single_info[1])
+        return single_info[1]
 
     def rank_stat_coinc(self, sngls_list, slide, step, to_shift,
                         **kwargs): # pylint:disable=unused-argument
@@ -663,6 +666,9 @@ def rank_stat_single(self, single_info,
         """
         Calculate the statistic for a single detector candidate
 
+        For this statistic this is just passing through the
+        single value, which will be the second entry in the tuple.
+
         Parameters
         ----------
         single_info: tuple
@@ -674,7 +680,7 @@ def rank_stat_single(self, single_info,
         numpy.ndarray
             The array of single detector statistics
         """
-        return self.single(single_info[1])
+        return single_info[1]
 
     def rank_stat_coinc(self, sngls_list, slide, step, to_shift,
                         **kwargs):  # pylint:disable=unused-argument
diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py
index edac3f26964..2cd0dd1e450 100644
--- a/pycbc/io/hdf.py
+++ b/pycbc/io/hdf.py
@@ -517,7 +517,8 @@ def mask_to_n_loudest_clustered_events(self, rank_method,
         be considered."""
 
         # If this becomes memory intensive we can optimize
-        stat = rank_method.rank_stat_single((self.ifo, self.trig_dict()))
+        sds = rank_method.single(self.trig_dict())
+        stat = rank_method.rank_stat_single((self.ifo, sds))
         if len(stat) == 0:
             # No triggers, so just return here
             self.stat = np.array([])
diff --git a/pycbc/workflow/minifollowups.py b/pycbc/workflow/minifollowups.py
index 1f16c714e71..5e59533da5d 100644
--- a/pycbc/workflow/minifollowups.py
+++ b/pycbc/workflow/minifollowups.py
@@ -125,7 +125,8 @@ def setup_foreground_minifollowups(workflow, coinc_file, single_triggers,
 def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file,
                                    insp_segs, insp_data_name, insp_anal_name,
                                    dax_output, out_dir, veto_file=None,
-                                   veto_segment_name=None, statfiles=None,
+                                   veto_segment_name=None, fg_file=None,
+                                   fg_name=None, statfiles=None,
                                    tags=None):
     """ Create plots that followup the Nth loudest clustered single detector
     triggers from a merged single detector trigger HDF file.
@@ -192,8 +193,11 @@ def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file,
         assert(veto_segment_name is not None)
         node.add_input_opt('--veto-file', veto_file)
         node.add_opt('--veto-segment-name', veto_segment_name)
+    if fg_file is not None:
+        assert(fg_name is not None)
+        node.add_input_opt('--foreground-censor-file', fg_file)
+        node.add_opt('--foreground-segment-name', fg_name)
     if statfiles:
-        statfiles = statfiles.find_output_with_ifo(curr_ifo)
         node.add_input_list_opt('--statistic-files', statfiles)
     if tags:
         node.add_list_opt('--tags', tags)
@@ -555,7 +559,7 @@ def make_coinc_info(workflow, singles, bank, coinc, out_dir,
     return files
 
 def make_sngl_ifo(workflow, sngl_file, bank_file, trigger_id, out_dir, ifo,
-                  title=None, tags=None):
+                  statfiles=None, title=None, tags=None):
     """Setup a job to create sngl detector sngl ifo html summary snippet.
     """
     tags = [] if tags is None else tags
@@ -568,6 +572,8 @@ def make_sngl_ifo(workflow, sngl_file, bank_file, trigger_id, out_dir, ifo,
     node.add_input_opt('--bank-file', bank_file)
     node.add_opt('--trigger-id', str(trigger_id))
     node.add_opt('--instrument', ifo)
+    if statfiles is not None:
+        node.add_input_list_opt('--statistic-files', statfiles)
     if title is not None:
         node.add_opt('--title', f'"{title}"')
     node.new_output_file_opt(workflow.analysis_time, '.html', '--output-file')