Skip to content

Commit

Permalink
Fix sngls_minifollowup event selection/ordering [master] (gwastro#4823)
Browse files Browse the repository at this point in the history
* Fix sngls_minifollowup event selection/ordering [master]

* Remove testing printing

* Still create stat class variable even when there are no triggers

* Ranking -> statistic rename

* minor bugs caught duiring review

* oops, accidentally added to this PR
  • Loading branch information
GarethCabournDavies authored Jul 29, 2024
1 parent ba7b9c0 commit e4a9db6
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 45 deletions.
57 changes: 21 additions & 36 deletions bin/minifollowups/pycbc_sngl_minifollowup
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import pycbc.workflow.minifollowups as mini
import pycbc.workflow as wf
import pycbc.events
from pycbc.workflow.core import resolve_url_to_file
from pycbc.events import stat, veto, coinc
from pycbc.events import stat, veto
from pycbc.io import hdf

parser = argparse.ArgumentParser(description=__doc__[1:])
Expand Down Expand Up @@ -71,7 +71,8 @@ parser.add_argument('--inspiral-data-analyzed-name',
"analyzed by each analysis job.")
parser.add_argument('--min-sngl-ranking', type=float, default=6.5,
help="Minimum sngl-ranking to consider for loudest "
"triggers. Default=6.5.")
"triggers. Useful for efficiency savings. "
"Default=6.5.")
parser.add_argument('--non-coinc-time-only', action='store_true',
help="If given remove (veto) single-detector triggers "
"that occur during a time when at least one other "
Expand Down Expand Up @@ -175,7 +176,7 @@ if args.veto_file:
logging.info('Getting file vetoes')
# veto_mask is an array of indices into the trigger arrays
# giving the surviving triggers
veto_file_idx, _ = events.veto.indices_within_segments(
veto_file_idx, _ = pycbc.events.veto.indices_within_segments(
trigs.end_time,
[args.veto_file],
ifo=args.instrument,
Expand Down Expand Up @@ -237,48 +238,32 @@ if args.maximum_duration is not None:
logging.info('Finding loudest clustered events')
rank_method = stat.get_statistic_from_opts(args, [args.instrument])

extra_kwargs = {}
for inputstr in args.statistic_keywords:
try:
key, value = inputstr.split(':')
extra_kwargs[key] = value
except ValueError:
err_txt = "--statistic-keywords must take input in the " \
"form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \
"Received {}".format(args.statistic_keywords)
raise ValueError(err_txt)

logging.info("Calculating statistic for %d triggers", len(trigs.snr))
sds = rank_method.single(trigs)
stat = rank_method.rank_stat_single((args.instrument, sds), **extra_kwargs)
logging.info("Clustering events over %.3fs window", args.cluster_window)
cid = coinc.cluster_over_time(stat, trigs.end_time,
args.cluster_window)
trigs.apply_mask(cid)
stat = stat[cid]
if len(trigs.snr) < num_events:
num_events = len(trigs.snr)

logging.info("Finding the loudest triggers")
loudest_idx = sorted(numpy.argsort(stat)[::-1][:num_events])
trigs.apply_mask(loudest_idx)
stat = stat[loudest_idx]
extra_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords)

trigs.mask_to_n_loudest_clustered_events(
rank_method,
n_loudest=num_events,
cluster_window=args.cluster_window,
statistic_kwargs=extra_kwargs,
)

times = trigs.end_time
tids = trigs.template_id

if isinstance(trigs.mask, numpy.ndarray) and trigs.mask.dtype == bool:
trigger_ids = numpy.flatnonzero(trigs.mask)
else:
trigger_ids = trigs.mask

trig_stat = trigs.stat
# loop over number of loudest events to be followed up
order = stat.argsort()[::-1]
order = trig_stat.argsort()[::-1]
for rank, num_event in enumerate(order):
logging.info('Processing event: %s', num_event)
logging.info('Processing event: %s', rank)

files = wf.FileList([])
time = times[num_event]
ifo_time = '%s:%s' %(args.instrument, str(time))
if isinstance(trigs.mask, numpy.ndarray) and trigs.mask.dtype == bool:
tid = numpy.flatnonzero(trigs.mask)[num_event]
else:
tid = trigs.mask[num_event]
tid = trigger_ids[num_event]
ifo_tid = '%s:%s' %(args.instrument, str(tid))

layouts += (mini.make_sngl_ifo(workflow, sngl_file, tmpltbank_file,
Expand Down
25 changes: 16 additions & 9 deletions pycbc/io/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ def trig_dict(self):
mtrigs[k] = self.trigs[k][self.mask]
else:
mtrigs[k] = self.trigs[k][:]
mtrigs['ifo'] = self.ifo
return mtrigs

@classmethod
Expand Down Expand Up @@ -687,30 +688,35 @@ def and_masks(self, logic_mask):
self.mask[and_indices.astype(np.uint64)] = True

def mask_to_n_loudest_clustered_events(self, rank_method,
ranking_threshold=6,
statistic_threshold=None,
n_loudest=10,
cluster_window=10):
cluster_window=10,
statistic_kwargs=None):
"""Edits the mask property of the class to point to the N loudest
single detector events as ranked by ranking statistic.
Events are clustered so that no more than 1 event within +/-
cluster_window will be considered. Can apply a threshold on the
ranking using ranking_threshold
statistic using statistic_threshold
"""

if statistic_kwargs is None:
statistic_kwargs = {}
sds = rank_method.single(self.trig_dict())
stat = rank_method.rank_stat_single((self.ifo, sds))
stat = rank_method.rank_stat_single(
(self.ifo, sds),
**statistic_kwargs
)
if len(stat) == 0:
# No triggers at all, so just return here
self.apply_mask(np.array([], dtype=np.uint64))
self.stat = np.array([], dtype=np.uint64)
return

times = self.end_time
if ranking_threshold:
# Threshold on sngl_ranking
# Note that we can provide None or zero to do no thresholding
# but the default is to do some
keep = stat >= ranking_threshold
if statistic_threshold is not None:
# Threshold on statistic
keep = stat >= statistic_threshold
stat = stat[keep]
times = times[keep]
self.apply_mask(keep)
Expand Down Expand Up @@ -744,6 +750,7 @@ def mask_to_n_loudest_clustered_events(self, rank_method,
index.sort()
# Apply to the existing mask
self.apply_mask(index)
self.stat = stat[index]

@property
def mask_size(self):
Expand Down

0 comments on commit e4a9db6

Please sign in to comment.