Skip to content

Commit

Permalink
Use gating windows in sngl_minifollowup and allow 'triggers within ve…
Browse files Browse the repository at this point in the history
…toes' option (gwastro#4549)

* Use gating windows in sngl_minifollowup - also allow 'triggers within vetoes' option

* Minor fixes

* add infor if fewer triggers than requested

* Allow pycbc_sngls_minifollowup to use single ranking statistic

* Fix bug in rank_stat_single for quadsum/phasetd statistics

* mask may be boolean now

* fix broken pycbc_page_snglinfo due to bugfix

* page_snglinfo to use the ranking statistic used

* missed change
  • Loading branch information
GarethCabournDavies authored and bhooshan-gadre committed Dec 19, 2023
1 parent da9f23a commit 1c22455
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 39 deletions.
7 changes: 5 additions & 2 deletions bin/minifollowups/pycbc_page_snglinfo
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,11 @@ else:

if args.ranking_statistic in ["quadsum", "single_ranking_only"]:
stat_name = sngl_stat_name
stat_name_long = sngl_stat_name
else:
stat_name = '_with_'.join(ranking_statistic, sngl_ranking)
# Name would be too long - just call it ranking statistic
stat_name = 'Ranking Statistic'
stat_name_long = ' with '.join([args.ranking_statistic, args.sngl_ranking])

headers.append(stat_name)

Expand Down Expand Up @@ -201,7 +204,7 @@ html = pycbc.results.dq.redirect_javascript + \
if args.n_loudest:
title = 'Parameters of single-detector event ranked %s' \
% (args.n_loudest + 1)
caption = 'Parameters of the single-detector event ranked number %s by %s. The figures below show the mini-followup data for this event.' % (args.n_loudest + 1, stat_name)
caption = 'Parameters of the single-detector event ranked number %s by %s. The figures below show the mini-followup data for this event.' % (args.n_loudest + 1, stat_name_long)
else:
title = 'Parameters of single-detector event'
caption = 'Parameters of the single-detector event. The figures below show the mini-followup data for this event.'
Expand Down
172 changes: 143 additions & 29 deletions bin/minifollowups/pycbc_sngl_minifollowup
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,48 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" Followup foreground events
"""
Followup single-detector triggers which do not contribute to foreground events
"""
import os, argparse, logging
import numpy
from ligo.lw import table
from ligo.lw import utils as ligolw_utils
from pycbc.results import layout
from pycbc.types.optparse import MultiDetOptionAction
from pycbc.events import select_segments_by_definer
import pycbc.workflow.minifollowups as mini
import pycbc.version
import pycbc.workflow as wf
import pycbc.events
from pycbc.workflow.core import resolve_url_to_file
from pycbc.events import stat
from pycbc.events import stat, veto, coinc
from pycbc.io import hdf

def add_wiki_row(outfile, cols):
"""
Adds a wiki-formatted row to an output file from a list or a numpy array.
"""
with open(outfile, 'a') as f:
f.write('||%s||\n' % '||'.join(map(str,cols)))

parser = argparse.ArgumentParser(description=__doc__[1:])
parser.add_argument('--version', action='version', version=pycbc.version.git_verbose_msg)
parser.add_argument('--bank-file',
help="HDF format template bank file")
parser.add_argument('--single-detector-file',
help="HDF format merged single detector trigger files")
parser.add_argument('--instrument', help="Name of interferometer e.g. H1")
parser.add_argument('--foreground-censor-file',
help="The censor file to be used if vetoing triggers "
"in the foreground of the search (optional).")
parser.add_argument('--foreground-segment-name',
help="If using foreground censor file must also provide "
"the name of the segment to use as a veto.")
parser.add_argument('--veto-file',
help="The veto file to be used if vetoing triggers (optional).")
help="The veto file to be used if vetoing triggers "
"(optional).")
parser.add_argument('--veto-segment-name',
help="If using veto file must also provide the name of the segment to use "
"as a veto.")
help="If using veto file must also provide the name of "
"the segment to use as a veto.")
parser.add_argument("--gating-veto-windows", nargs='+',
action=MultiDetOptionAction,
help="Seconds to be vetoed before and after the central time "
"of each gate. Given as detector-values pairs, e.g. "
"H1:-1,2.5 L1:-1,2.5 V1:0,0")
parser.add_argument('--inspiral-segments',
help="xml segment file containing the inspiral analysis "
"times")
Expand All @@ -60,17 +67,22 @@ parser.add_argument('--inspiral-data-analyzed-name',
"analyzed by each analysis job.")
parser.add_argument('--min-snr', type=float, default=6.5,
help="Minimum SNR to consider for loudest triggers")
parser.add_argument('--non-coinc-time-only', default=False,
action='store_true',
parser.add_argument('--non-coinc-time-only', action='store_true',
help="If given remove (veto) single-detector triggers "
"that occur during a time when at least one other "
"instrument is taking science data.")
parser.add_argument('--vetoed-time-only', action='store_true',
help="If given, only report on single-detector triggers "
"that occur during vetoed times.")
parser.add_argument('--minimum-duration', default=None, type=float,
help="If given only consider single-detector triggers "
"with template duration larger than this.")
parser.add_argument('--maximum-duration', default=None, type=float,
help="If given only consider single-detector triggers "
"with template duration smaller than this.")
parser.add_argument('--cluster-window', type=float, default=10,
help="Window (seconds) over which to cluster triggers "
"when finding the loudest-ranked. Default=10")
wf.add_workflow_command_line_group(parser)
wf.add_workflow_settings_cli(parser, include_subdax_opts=True)
stat.insert_statistic_option_group(parser,
Expand All @@ -95,13 +107,20 @@ sngl_file = resolve_url_to_file(
attrs={'ifos': args.instrument}
)

# Flatten the statistic_files option:
statfiles = []
for f in sum(args.statistic_files, []):
statfiles.append(resolve_url_to_file(os.path.abspath(f)))
statfiles = wf.FileList(statfiles) if statfiles is not [] else None

if args.veto_file is not None:
veto_file = resolve_url_to_file(
os.path.abspath(args.veto_file),
attrs={'ifos': args.instrument}
)
else:
veto_file = None

insp_segs = resolve_url_to_file(os.path.abspath(args.inspiral_segments))
insp_data_seglists = select_segments_by_definer\
(args.inspiral_segments, segment_name=args.inspiral_data_read_name,
Expand All @@ -112,20 +131,89 @@ num_events = int(workflow.cp.get_opt_tags('workflow-sngl_minifollowups',
'num-sngl-events', ''))

# This helps speed up the processing to ignore a large fraction of triggers
snr_mask = None
mask = None
f = hdf.HFile(args.single_detector_file, 'r')
n_triggers = f['{}/snr'.format(args.instrument)].size
logging.info("%i triggers in file", n_triggers)
if args.min_snr:
logging.info('Calculating Prefilter')
f = hdf.HFile(args.single_detector_file, 'r')
idx, _ = f.select(lambda snr: snr > args.min_snr,
'{}/snr'.format(args.instrument),
return_indices=True)
snr_mask = numpy.zeros(len(f['{}/snr'.format(args.instrument)]),
dtype=bool)
snr_mask[idx] = True
mask = numpy.zeros(n_triggers, dtype=bool)
mask[idx] = True
if len(idx) < num_events:
logging.info("Fewer triggers exist after the --min-snr cut (%d) "
"than requested for the minifollowup (%d)",
len(idx), num_events)

trigs = hdf.SingleDetTriggers(args.single_detector_file, args.bank_file,
args.veto_file, args.veto_segment_name,
None, args.instrument, premask=snr_mask)
trigs = hdf.SingleDetTriggers(
args.single_detector_file,
args.bank_file,
args.foreground_censor_file,
args.foreground_segment_name,
None,
args.instrument,
premask=mask
)

# Include gating vetoes
if args.gating_veto_windows:
logging.info("Getting gating vetoes")
gating_veto = args.gating_veto_windows[args.instrument].split(',')
gveto_before = float(gating_veto[0])
gveto_after = float(gating_veto[1])
if gveto_before > 0 or gveto_after < 0:
raise ValueError("Gating veto window values must be negative before "
"gates and positive after gates.")
if not (gveto_before == 0 and gveto_after == 0):
gate_group = f[args.instrument + '/gating/']
autogate_times = numpy.unique(gate_group['auto/time'][:])
if 'file' in gate_group:
detgate_times = gate_group['file/time'][:]
else:
detgate_times = []
gate_times = numpy.concatenate((autogate_times, detgate_times))
gveto_idx = veto.indices_within_times(
trigs.end_time,
gate_times + gveto_before,
gate_times + gveto_after
)
logging.info('%i triggers in gating vetoes', gveto_idx.size)
else:
gveto_idx = numpy.array([], dtype=numpy.uint64)

if args.veto_file:
logging.info('Getting file vetoes')
# veto_mask is an array of indices into the trigger arrays
# giving the surviving triggers
veto_file_idx, _ = events.veto.indices_within_segments(
trigs.end_time,
[args.veto_file],
ifo=args.instrument,
segment_name=args.veto_segment_name
)

logging.info('%i triggers in file-vetoed segments',
veto_file_idx.size)
else:
veto_file_idx = numpy.array([], dtype=numpy.uint64)

# Work out indices we are going to keep / remove
vetoed_idx = numpy.unique(numpy.concatenate((veto_file_idx, gveto_idx)))
# Needs to be in ascending order
vetoed_idx = numpy.sort(vetoed_idx).astype(numpy.uint64)

if args.vetoed_time_only and vetoed_idx.size > 0:
logging.info("Applying mask to keep only triggers within vetoed time")
trigs.apply_mask(vetoed_idx)
elif vetoed_idx.size > 0:
logging.info("Applying mask to keep only triggers outwith vetoed time")
veto_mask = numpy.ones(trigs.end_time.size, dtype=bool)
veto_mask[vetoed_idx] = False
trigs.apply_mask(veto_mask)
elif args.vetoed_time_only and vetoed_idx.size == 0:
logging.warning("No triggers exist inside vetoed times")

if args.non_coinc_time_only:
from pycbc.io.ligolw import LIGOLWContentHandler as h
Expand Down Expand Up @@ -158,29 +246,56 @@ if args.maximum_duration is not None:
trigs.apply_mask(lgc_mask)
logging.info('remaining triggers: %s', trigs.mask.sum())

logging.info('finding loudest clustered events')
logging.info('Finding loudest clustered events')
rank_method = stat.get_statistic_from_opts(args, [args.instrument])
trigs.mask_to_n_loudest_clustered_events(rank_method, n_loudest=num_events)

if len(trigs.stat) < num_events:
num_events = len(trigs.stat)
extra_kwargs = {}
for inputstr in args.statistic_keywords:
try:
key, value = inputstr.split(':')
extra_kwargs[key] = value
except ValueError:
err_txt = "--statistic-keywords must take input in the " \
"form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \
"Received {}".format(args.statistic_keywords)
raise ValueError(err_txt)

logging.info("Calculating statistic for %d triggers", len(trigs.snr))
sds = rank_method.single(trigs)
stat = rank_method.rank_stat_single((args.instrument, sds), **extra_kwargs)
logging.info("Clustering events over %.3fs window", args.cluster_window)
cid = coinc.cluster_over_time(stat, trigs.end_time,
args.cluster_window)
trigs.apply_mask(cid)
stat = stat[cid]
if len(trigs.snr) < num_events:
num_events = len(trigs.snr)

logging.info("Finding the loudest triggers")
loudest_idx = sorted(numpy.argsort(stat)[::-1][:num_events])
trigs.apply_mask(loudest_idx)
stat = stat[loudest_idx]

times = trigs.end_time
tids = trigs.template_id

# loop over number of loudest events to be followed up
order = trigs.stat.argsort()[::-1]
order = stat.argsort()[::-1]
for rank, num_event in enumerate(order):
logging.info('Processing event: %s', num_event)

files = wf.FileList([])
time = times[num_event]
ifo_time = '%s:%s' %(args.instrument, str(time))
tid = trigs.mask[num_event]
if isinstance(trigs.mask, numpy.ndarray) and trigs.mask.dtype == bool:
tid = numpy.flatnonzero(trigs.mask)[num_event]
else:
tid = trigs.mask[num_event]
ifo_tid = '%s:%s' %(args.instrument, str(tid))

layouts += (mini.make_sngl_ifo(workflow, sngl_file, tmpltbank_file,
tid, args.output_dir, args.instrument,
statfiles=statfiles,
tags=args.tags + [str(rank)]),)
files += mini.make_trigger_timeseries(workflow, [sngl_file],
ifo_time, args.output_dir, special_tids=ifo_tid,
Expand Down Expand Up @@ -217,7 +332,6 @@ for rank, num_event in enumerate(order):
args.output_dir, [args.instrument],
tags=args.tags + [str(rank)])


files += mini.make_singles_timefreq(workflow, sngl_file, tmpltbank_file,
time, args.output_dir,
data_segments=insp_data_seglists,
Expand Down
5 changes: 3 additions & 2 deletions bin/workflows/pycbc_make_offline_search_workflow
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ insps = wf.merge_single_detector_hdf_files(workflow, hdfbank,
tags=['full_data'])

# setup sngl trigger distribution fitting jobs
# 'statfiles' is list of files used in calculating coinc statistic
# 'statfiles' is list of files used in calculating statistic
# 'dqfiles' is the subset of files containing data quality information
statfiles = []
dqfiles = []
Expand Down Expand Up @@ -458,7 +458,8 @@ for insp_file in full_insps:
wf.setup_single_det_minifollowups\
(workflow, insp_file, hdfbank, insp_files_seg_file,
data_analysed_name, trig_generated_name, 'daxes', currdir,
veto_file=censored_veto, veto_segment_name='closed_box',
statfiles=wf.FileList(statfiles),
fg_file=censored_veto, fg_name='closed_box',
tags=insp_file.tags + [subsec])

##################### COINC FULL_DATA plots ###################################
Expand Down
10 changes: 8 additions & 2 deletions pycbc/events/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ def rank_stat_single(self, single_info,
"""
Calculate the statistic for a single detector candidate
For this statistic this is just passing through the
single value, which will be the second entry in the tuple.
Parameters
----------
single_info: tuple
Expand All @@ -222,7 +225,7 @@ def rank_stat_single(self, single_info,
numpy.ndarray
The array of single detector statistics
"""
return self.single(single_info[1])
return single_info[1]

def rank_stat_coinc(self, sngls_list, slide, step, to_shift,
**kwargs): # pylint:disable=unused-argument
Expand Down Expand Up @@ -663,6 +666,9 @@ def rank_stat_single(self, single_info,
"""
Calculate the statistic for a single detector candidate
For this statistic this is just passing through the
single value, which will be the second entry in the tuple.
Parameters
----------
single_info: tuple
Expand All @@ -674,7 +680,7 @@ def rank_stat_single(self, single_info,
numpy.ndarray
The array of single detector statistics
"""
return self.single(single_info[1])
return single_info[1]

def rank_stat_coinc(self, sngls_list, slide, step, to_shift,
**kwargs): # pylint:disable=unused-argument
Expand Down
3 changes: 2 additions & 1 deletion pycbc/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,8 @@ def mask_to_n_loudest_clustered_events(self, rank_method,
be considered."""

# If this becomes memory intensive we can optimize
stat = rank_method.rank_stat_single((self.ifo, self.trig_dict()))
sds = rank_method.single(self.trig_dict())
stat = rank_method.rank_stat_single((self.ifo, sds))
if len(stat) == 0:
# No triggers, so just return here
self.stat = np.array([])
Expand Down
Loading

0 comments on commit 1c22455

Please sign in to comment.