From 1c22455bd9bb035964e998a72f2ce2826c7b2209 Mon Sep 17 00:00:00 2001 From: Gareth S Cabourn Davies Date: Thu, 23 Nov 2023 13:33:00 +0000 Subject: [PATCH] Use gating windows in sngl_minifollowup and allow 'triggers within vetoes' option (#4549) * Use gating windows in sngl_minifollowup - also allow 'triggers within vetoes' option * Minor fixes * add infor if fewer triggers than requested * Allow pycbc_sngls_minifollowup to use single ranking statistic * Fix bug in rank_stat_single for quadsum/phasetd statistics * mask may be boolean now * fix broken pycbc_page_snglinfo due to bugfix * page_snglinfo to use the ranking statistic used * missed change --- bin/minifollowups/pycbc_page_snglinfo | 7 +- bin/minifollowups/pycbc_sngl_minifollowup | 172 +++++++++++++++--- .../pycbc_make_offline_search_workflow | 5 +- pycbc/events/stat.py | 10 +- pycbc/io/hdf.py | 3 +- pycbc/workflow/minifollowups.py | 12 +- 6 files changed, 170 insertions(+), 39 deletions(-) diff --git a/bin/minifollowups/pycbc_page_snglinfo b/bin/minifollowups/pycbc_page_snglinfo index e6bc8dc31a4..eb248daecd1 100644 --- a/bin/minifollowups/pycbc_page_snglinfo +++ b/bin/minifollowups/pycbc_page_snglinfo @@ -138,8 +138,11 @@ else: if args.ranking_statistic in ["quadsum", "single_ranking_only"]: stat_name = sngl_stat_name + stat_name_long = sngl_stat_name else: - stat_name = '_with_'.join(ranking_statistic, sngl_ranking) + # Name would be too long - just call it ranking statistic + stat_name = 'Ranking Statistic' + stat_name_long = ' with '.join([args.ranking_statistic, args.sngl_ranking]) headers.append(stat_name) @@ -201,7 +204,7 @@ html = pycbc.results.dq.redirect_javascript + \ if args.n_loudest: title = 'Parameters of single-detector event ranked %s' \ % (args.n_loudest + 1) - caption = 'Parameters of the single-detector event ranked number %s by %s. The figures below show the mini-followup data for this event.' % (args.n_loudest + 1, stat_name) + caption = 'Parameters of the single-detector event ranked number %s by %s. The figures below show the mini-followup data for this event.' % (args.n_loudest + 1, stat_name_long) else: title = 'Parameters of single-detector event' caption = 'Parameters of the single-detector event. The figures below show the mini-followup data for this event.' diff --git a/bin/minifollowups/pycbc_sngl_minifollowup b/bin/minifollowups/pycbc_sngl_minifollowup index bd3ae4bed1a..01fbc743e83 100644 --- a/bin/minifollowups/pycbc_sngl_minifollowup +++ b/bin/minifollowups/pycbc_sngl_minifollowup @@ -14,29 +14,24 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -""" Followup foreground events +""" +Followup single-detector triggers which do not contribute to foreground events """ import os, argparse, logging import numpy from ligo.lw import table from ligo.lw import utils as ligolw_utils from pycbc.results import layout +from pycbc.types.optparse import MultiDetOptionAction from pycbc.events import select_segments_by_definer import pycbc.workflow.minifollowups as mini import pycbc.version import pycbc.workflow as wf import pycbc.events from pycbc.workflow.core import resolve_url_to_file -from pycbc.events import stat +from pycbc.events import stat, veto, coinc from pycbc.io import hdf -def add_wiki_row(outfile, cols): - """ - Adds a wiki-formatted row to an output file from a list or a numpy array. - """ - with open(outfile, 'a') as f: - f.write('||%s||\n' % '||'.join(map(str,cols))) - parser = argparse.ArgumentParser(description=__doc__[1:]) parser.add_argument('--version', action='version', version=pycbc.version.git_verbose_msg) parser.add_argument('--bank-file', @@ -44,11 +39,23 @@ parser.add_argument('--bank-file', parser.add_argument('--single-detector-file', help="HDF format merged single detector trigger files") parser.add_argument('--instrument', help="Name of interferometer e.g. H1") +parser.add_argument('--foreground-censor-file', + help="The censor file to be used if vetoing triggers " + "in the foreground of the search (optional).") +parser.add_argument('--foreground-segment-name', + help="If using foreground censor file must also provide " + "the name of the segment to use as a veto.") parser.add_argument('--veto-file', - help="The veto file to be used if vetoing triggers (optional).") + help="The veto file to be used if vetoing triggers " + "(optional).") parser.add_argument('--veto-segment-name', - help="If using veto file must also provide the name of the segment to use " - "as a veto.") + help="If using veto file must also provide the name of " + "the segment to use as a veto.") +parser.add_argument("--gating-veto-windows", nargs='+', + action=MultiDetOptionAction, + help="Seconds to be vetoed before and after the central time " + "of each gate. Given as detector-values pairs, e.g. " + "H1:-1,2.5 L1:-1,2.5 V1:0,0") parser.add_argument('--inspiral-segments', help="xml segment file containing the inspiral analysis " "times") @@ -60,17 +67,22 @@ parser.add_argument('--inspiral-data-analyzed-name', "analyzed by each analysis job.") parser.add_argument('--min-snr', type=float, default=6.5, help="Minimum SNR to consider for loudest triggers") -parser.add_argument('--non-coinc-time-only', default=False, - action='store_true', +parser.add_argument('--non-coinc-time-only', action='store_true', help="If given remove (veto) single-detector triggers " "that occur during a time when at least one other " "instrument is taking science data.") +parser.add_argument('--vetoed-time-only', action='store_true', + help="If given, only report on single-detector triggers " + "that occur during vetoed times.") parser.add_argument('--minimum-duration', default=None, type=float, help="If given only consider single-detector triggers " "with template duration larger than this.") parser.add_argument('--maximum-duration', default=None, type=float, help="If given only consider single-detector triggers " "with template duration smaller than this.") +parser.add_argument('--cluster-window', type=float, default=10, + help="Window (seconds) over which to cluster triggers " + "when finding the loudest-ranked. Default=10") wf.add_workflow_command_line_group(parser) wf.add_workflow_settings_cli(parser, include_subdax_opts=True) stat.insert_statistic_option_group(parser, @@ -95,6 +107,12 @@ sngl_file = resolve_url_to_file( attrs={'ifos': args.instrument} ) +# Flatten the statistic_files option: +statfiles = [] +for f in sum(args.statistic_files, []): + statfiles.append(resolve_url_to_file(os.path.abspath(f))) +statfiles = wf.FileList(statfiles) if statfiles is not [] else None + if args.veto_file is not None: veto_file = resolve_url_to_file( os.path.abspath(args.veto_file), @@ -102,6 +120,7 @@ if args.veto_file is not None: ) else: veto_file = None + insp_segs = resolve_url_to_file(os.path.abspath(args.inspiral_segments)) insp_data_seglists = select_segments_by_definer\ (args.inspiral_segments, segment_name=args.inspiral_data_read_name, @@ -112,20 +131,89 @@ num_events = int(workflow.cp.get_opt_tags('workflow-sngl_minifollowups', 'num-sngl-events', '')) # This helps speed up the processing to ignore a large fraction of triggers -snr_mask = None +mask = None +f = hdf.HFile(args.single_detector_file, 'r') +n_triggers = f['{}/snr'.format(args.instrument)].size +logging.info("%i triggers in file", n_triggers) if args.min_snr: logging.info('Calculating Prefilter') - f = hdf.HFile(args.single_detector_file, 'r') idx, _ = f.select(lambda snr: snr > args.min_snr, '{}/snr'.format(args.instrument), return_indices=True) - snr_mask = numpy.zeros(len(f['{}/snr'.format(args.instrument)]), - dtype=bool) - snr_mask[idx] = True + mask = numpy.zeros(n_triggers, dtype=bool) + mask[idx] = True + if len(idx) < num_events: + logging.info("Fewer triggers exist after the --min-snr cut (%d) " + "than requested for the minifollowup (%d)", + len(idx), num_events) -trigs = hdf.SingleDetTriggers(args.single_detector_file, args.bank_file, - args.veto_file, args.veto_segment_name, - None, args.instrument, premask=snr_mask) +trigs = hdf.SingleDetTriggers( + args.single_detector_file, + args.bank_file, + args.foreground_censor_file, + args.foreground_segment_name, + None, + args.instrument, + premask=mask +) + +# Include gating vetoes +if args.gating_veto_windows: + logging.info("Getting gating vetoes") + gating_veto = args.gating_veto_windows[args.instrument].split(',') + gveto_before = float(gating_veto[0]) + gveto_after = float(gating_veto[1]) + if gveto_before > 0 or gveto_after < 0: + raise ValueError("Gating veto window values must be negative before " + "gates and positive after gates.") + if not (gveto_before == 0 and gveto_after == 0): + gate_group = f[args.instrument + '/gating/'] + autogate_times = numpy.unique(gate_group['auto/time'][:]) + if 'file' in gate_group: + detgate_times = gate_group['file/time'][:] + else: + detgate_times = [] + gate_times = numpy.concatenate((autogate_times, detgate_times)) + gveto_idx = veto.indices_within_times( + trigs.end_time, + gate_times + gveto_before, + gate_times + gveto_after + ) + logging.info('%i triggers in gating vetoes', gveto_idx.size) +else: + gveto_idx = numpy.array([], dtype=numpy.uint64) + +if args.veto_file: + logging.info('Getting file vetoes') + # veto_mask is an array of indices into the trigger arrays + # giving the surviving triggers + veto_file_idx, _ = events.veto.indices_within_segments( + trigs.end_time, + [args.veto_file], + ifo=args.instrument, + segment_name=args.veto_segment_name + ) + + logging.info('%i triggers in file-vetoed segments', + veto_file_idx.size) +else: + veto_file_idx = numpy.array([], dtype=numpy.uint64) + +# Work out indices we are going to keep / remove +vetoed_idx = numpy.unique(numpy.concatenate((veto_file_idx, gveto_idx))) +# Needs to be in ascending order +vetoed_idx = numpy.sort(vetoed_idx).astype(numpy.uint64) + +if args.vetoed_time_only and vetoed_idx.size > 0: + logging.info("Applying mask to keep only triggers within vetoed time") + trigs.apply_mask(vetoed_idx) +elif vetoed_idx.size > 0: + logging.info("Applying mask to keep only triggers outwith vetoed time") + veto_mask = numpy.ones(trigs.end_time.size, dtype=bool) + veto_mask[vetoed_idx] = False + trigs.apply_mask(veto_mask) +elif args.vetoed_time_only and vetoed_idx.size == 0: + logging.warning("No triggers exist inside vetoed times") if args.non_coinc_time_only: from pycbc.io.ligolw import LIGOLWContentHandler as h @@ -158,29 +246,56 @@ if args.maximum_duration is not None: trigs.apply_mask(lgc_mask) logging.info('remaining triggers: %s', trigs.mask.sum()) -logging.info('finding loudest clustered events') +logging.info('Finding loudest clustered events') rank_method = stat.get_statistic_from_opts(args, [args.instrument]) -trigs.mask_to_n_loudest_clustered_events(rank_method, n_loudest=num_events) -if len(trigs.stat) < num_events: - num_events = len(trigs.stat) +extra_kwargs = {} +for inputstr in args.statistic_keywords: + try: + key, value = inputstr.split(':') + extra_kwargs[key] = value + except ValueError: + err_txt = "--statistic-keywords must take input in the " \ + "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \ + "Received {}".format(args.statistic_keywords) + raise ValueError(err_txt) + +logging.info("Calculating statistic for %d triggers", len(trigs.snr)) +sds = rank_method.single(trigs) +stat = rank_method.rank_stat_single((args.instrument, sds), **extra_kwargs) +logging.info("Clustering events over %.3fs window", args.cluster_window) +cid = coinc.cluster_over_time(stat, trigs.end_time, + args.cluster_window) +trigs.apply_mask(cid) +stat = stat[cid] +if len(trigs.snr) < num_events: + num_events = len(trigs.snr) + +logging.info("Finding the loudest triggers") +loudest_idx = sorted(numpy.argsort(stat)[::-1][:num_events]) +trigs.apply_mask(loudest_idx) +stat = stat[loudest_idx] times = trigs.end_time tids = trigs.template_id # loop over number of loudest events to be followed up -order = trigs.stat.argsort()[::-1] +order = stat.argsort()[::-1] for rank, num_event in enumerate(order): logging.info('Processing event: %s', num_event) files = wf.FileList([]) time = times[num_event] ifo_time = '%s:%s' %(args.instrument, str(time)) - tid = trigs.mask[num_event] + if isinstance(trigs.mask, numpy.ndarray) and trigs.mask.dtype == bool: + tid = numpy.flatnonzero(trigs.mask)[num_event] + else: + tid = trigs.mask[num_event] ifo_tid = '%s:%s' %(args.instrument, str(tid)) layouts += (mini.make_sngl_ifo(workflow, sngl_file, tmpltbank_file, tid, args.output_dir, args.instrument, + statfiles=statfiles, tags=args.tags + [str(rank)]),) files += mini.make_trigger_timeseries(workflow, [sngl_file], ifo_time, args.output_dir, special_tids=ifo_tid, @@ -217,7 +332,6 @@ for rank, num_event in enumerate(order): args.output_dir, [args.instrument], tags=args.tags + [str(rank)]) - files += mini.make_singles_timefreq(workflow, sngl_file, tmpltbank_file, time, args.output_dir, data_segments=insp_data_seglists, diff --git a/bin/workflows/pycbc_make_offline_search_workflow b/bin/workflows/pycbc_make_offline_search_workflow index 9b3a7da8678..e0995d33cc1 100755 --- a/bin/workflows/pycbc_make_offline_search_workflow +++ b/bin/workflows/pycbc_make_offline_search_workflow @@ -188,7 +188,7 @@ insps = wf.merge_single_detector_hdf_files(workflow, hdfbank, tags=['full_data']) # setup sngl trigger distribution fitting jobs -# 'statfiles' is list of files used in calculating coinc statistic +# 'statfiles' is list of files used in calculating statistic # 'dqfiles' is the subset of files containing data quality information statfiles = [] dqfiles = [] @@ -458,7 +458,8 @@ for insp_file in full_insps: wf.setup_single_det_minifollowups\ (workflow, insp_file, hdfbank, insp_files_seg_file, data_analysed_name, trig_generated_name, 'daxes', currdir, - veto_file=censored_veto, veto_segment_name='closed_box', + statfiles=wf.FileList(statfiles), + fg_file=censored_veto, fg_name='closed_box', tags=insp_file.tags + [subsec]) ##################### COINC FULL_DATA plots ################################### diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 6966e2586ae..4151571e356 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -211,6 +211,9 @@ def rank_stat_single(self, single_info, """ Calculate the statistic for a single detector candidate + For this statistic this is just passing through the + single value, which will be the second entry in the tuple. + Parameters ---------- single_info: tuple @@ -222,7 +225,7 @@ def rank_stat_single(self, single_info, numpy.ndarray The array of single detector statistics """ - return self.single(single_info[1]) + return single_info[1] def rank_stat_coinc(self, sngls_list, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument @@ -663,6 +666,9 @@ def rank_stat_single(self, single_info, """ Calculate the statistic for a single detector candidate + For this statistic this is just passing through the + single value, which will be the second entry in the tuple. + Parameters ---------- single_info: tuple @@ -674,7 +680,7 @@ def rank_stat_single(self, single_info, numpy.ndarray The array of single detector statistics """ - return self.single(single_info[1]) + return single_info[1] def rank_stat_coinc(self, sngls_list, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py index edac3f26964..2cd0dd1e450 100644 --- a/pycbc/io/hdf.py +++ b/pycbc/io/hdf.py @@ -517,7 +517,8 @@ def mask_to_n_loudest_clustered_events(self, rank_method, be considered.""" # If this becomes memory intensive we can optimize - stat = rank_method.rank_stat_single((self.ifo, self.trig_dict())) + sds = rank_method.single(self.trig_dict()) + stat = rank_method.rank_stat_single((self.ifo, sds)) if len(stat) == 0: # No triggers, so just return here self.stat = np.array([]) diff --git a/pycbc/workflow/minifollowups.py b/pycbc/workflow/minifollowups.py index 1f16c714e71..5e59533da5d 100644 --- a/pycbc/workflow/minifollowups.py +++ b/pycbc/workflow/minifollowups.py @@ -125,7 +125,8 @@ def setup_foreground_minifollowups(workflow, coinc_file, single_triggers, def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file, insp_segs, insp_data_name, insp_anal_name, dax_output, out_dir, veto_file=None, - veto_segment_name=None, statfiles=None, + veto_segment_name=None, fg_file=None, + fg_name=None, statfiles=None, tags=None): """ Create plots that followup the Nth loudest clustered single detector triggers from a merged single detector trigger HDF file. @@ -192,8 +193,11 @@ def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file, assert(veto_segment_name is not None) node.add_input_opt('--veto-file', veto_file) node.add_opt('--veto-segment-name', veto_segment_name) + if fg_file is not None: + assert(fg_name is not None) + node.add_input_opt('--foreground-censor-file', fg_file) + node.add_opt('--foreground-segment-name', fg_name) if statfiles: - statfiles = statfiles.find_output_with_ifo(curr_ifo) node.add_input_list_opt('--statistic-files', statfiles) if tags: node.add_list_opt('--tags', tags) @@ -555,7 +559,7 @@ def make_coinc_info(workflow, singles, bank, coinc, out_dir, return files def make_sngl_ifo(workflow, sngl_file, bank_file, trigger_id, out_dir, ifo, - title=None, tags=None): + statfiles=None, title=None, tags=None): """Setup a job to create sngl detector sngl ifo html summary snippet. """ tags = [] if tags is None else tags @@ -568,6 +572,8 @@ def make_sngl_ifo(workflow, sngl_file, bank_file, trigger_id, out_dir, ifo, node.add_input_opt('--bank-file', bank_file) node.add_opt('--trigger-id', str(trigger_id)) node.add_opt('--instrument', ifo) + if statfiles is not None: + node.add_input_list_opt('--statistic-files', statfiles) if title is not None: node.add_opt('--title', f'"{title}"') node.new_output_file_opt(workflow.analysis_time, '.html', '--output-file')