Skip to content

Commit

Permalink
Some changes to HFile.select() and SingleDetTriggers (gwastro#4545)
Browse files Browse the repository at this point in the history
* Some changes to HFile.select() and mask_to_n_loudest_clustered

* missed comma

* Some CC problemas

* Update

* revert changes which meant kwargs couldnt be passed to mask_to_n_loudest_clustered_events

* CC / fixes

* Use list rather than numpy array for mask

* remove messing about with the mask dtyping

* initialise a transparent mask if it is not given

* Fix problem when the filter value may not be available to the HFile

* thinko in pycbc_bin_trigger_rates_dq

* Don't make a huge boolean mask of all True values and then run nonzero on it = BAD IDEA

* CC

* re-enable option to have no mask

* efficiency saving in pycbc_plot_hist

* no mask option in trig_dict

* typo

* allow None maks in mask_size

* thought i could apply another mask but I cant

* get filter_func back, use new interface for recent changes

* Some minor fixes

* cc

* allow option to do a logical and on masks

* various minor bugfixes

* max z problem

* fixes to pycbc_page_snglinfo

* use alternative dict for beter clarity

* IWH comments

* add check for required datasets (mostly to remind people to add the required dataset list if they add a new ranking\!)

* fix bad check, remove min_snr option

* remove option from CI

* Fix bug with applying premask and using filter_rank threshold at the same time

* stop checking for required datasets - this is difficult when so may different types use the same function

* CC prefers np.flatnonzero(arr) to arr.nonzero()[0]

* Fix a couple of rarely-used scripts, remove unused imports, whitespace
  • Loading branch information
GarethCabournDavies authored and acorreia61201 committed Apr 4, 2024
1 parent 64d7f08 commit be1a69c
Show file tree
Hide file tree
Showing 15 changed files with 468 additions and 291 deletions.
60 changes: 3 additions & 57 deletions bin/all_sky_search/pycbc_bin_trigger_rates_dq
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ from pycbc.events.veto import (select_segments_by_definer,
start_end_to_segments,
segments_to_start_end)
from pycbc.types.optparse import MultiDetOptionAction
from pycbc.io.hdf import HFile, SingleDetTriggers
from pycbc.io.hdf import SingleDetTriggers
from pycbc.version import git_verbose_msg as version

parser = argparse.ArgumentParser(description=__doc__)
Expand Down Expand Up @@ -46,79 +46,25 @@ logging.info('Start')

ifo, flag_name = args.flag_name.split(':')

# Setup a data mask to remove any triggers with SNR below threshold
# This works as a pre-filter as SNR is always greater than or equal
# to sngl_ranking, except in the psdvar case, where it could increase.

with HFile(args.trig_file, 'r') as trig_file:
n_triggers_orig = trig_file[f'{ifo}/snr'].size
logging.info("Trigger file has %d triggers", n_triggers_orig)
logging.info('Generating trigger mask')
if f'{ifo}/psd_var_val' in trig_file:
idx, _, _ = trig_file.select(
lambda snr, psdvar: snr / psdvar ** 0.5 >= args.stat_threshold,
f'{ifo}/snr',
f'{ifo}/psd_var_val',
return_indices=True
)
else:
# psd_var_val may not have been calculated
idx, _ = trig_file.select(
lambda snr: snr >= args.stat_threshold,
f'{ifo}/snr',
return_indices=True
)
data_mask = np.zeros(n_triggers_orig, dtype=bool)
data_mask[idx] = True

# also get the gated times while we have the file open
gate_times = []
if args.gating_windows:
logging.info('Getting gated times')
try:
gating_types = trig_file[f'{ifo}/gating'].keys()
for gt in gating_types:
gate_times += list(trig_file[f'{ifo}/gating/{gt}/time'][:])
gate_times = np.unique(gate_times)
except KeyError:
logging.warning('No gating found in trigger file')

logging.info("Getting %s triggers from file with pre-cut SNR > %.3f",
idx.size, args.stat_threshold)

trigs = SingleDetTriggers(
args.trig_file,
None,
None,
None,
None,
ifo,
premask=data_mask
filter_rank=args.sngl_ranking,
filter_threshold=args.stat_threshold,
)

# Extract the data we actually need from the data structure:
tmplt_ids = trigs.template_id
trig_times = trigs.end_time
stat = trigs.get_ranking(args.sngl_ranking)

n_triggers = tmplt_ids.size

logging.info("Applying %s > %.3f cut", args.sngl_ranking,
args.stat_threshold)
keep = stat >= args.stat_threshold
tmplt_ids = tmplt_ids[keep]
trig_times = trig_times[keep]
logging.info("Removed %d triggers, %d remain",
n_triggers - tmplt_ids.size, tmplt_ids.size)

# Get the template bins
bin_tids_dict = {}
with h5.File(args.template_bins_file, 'r') as f:
ifo_grp = f[ifo]
for bin_name in ifo_grp.keys():
bin_tids_dict[bin_name] = ifo_grp[bin_name]['tids'][:]


# get analysis segments
analysis_segs = select_segments_by_definer(
args.analysis_segment_file,
Expand Down
35 changes: 3 additions & 32 deletions bin/all_sky_search/pycbc_fit_sngls_split_binned
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ from matplotlib import pyplot as plt
import numpy as np

from pycbc import events, bin_utils, results
from pycbc.io import HFile, SingleDetTriggers
from pycbc.io import SingleDetTriggers
from pycbc.events import triggers as trigs
from pycbc.events import trigger_fits as trstats
from pycbc.events import stat as pystat
Expand Down Expand Up @@ -202,48 +202,19 @@ boundaries = trigf[args.ifo + '/template_boundaries'][:]

trigf.close()

# Setup a data mask to remove any triggers with SNR below threshold
# This works as a pre-filter as SNR is always greater than or equal
# to sngl_ranking, except in the psdvar case, where it could increase.
with HFile(args.trigger_file, 'r') as trig_file:
n_triggers_orig = trig_file[f'{args.ifo}/snr'].size
logging.info("Trigger file has %d triggers", n_triggers_orig)
logging.info('Generating trigger mask')
if f'{args.ifo}/psd_var_val' in trig_file:
idx, _, _ = trig_file.select(
lambda snr, psdvar: snr / psdvar ** 0.5 >= args.plot_lower_stat_limit,
f'{args.ifo}/snr',
f'{args.ifo}/psd_var_val',
return_indices=True
)
else:
# psd_var_val may not have been calculated
idx, _ = trig_file.select(
lambda snr: snr >= args.plot_lower_stat_limit,
f'{args.ifo}/snr',
return_indices=True
)
data_mask = np.zeros(n_triggers_orig, dtype=bool)
data_mask[idx] = True


logging.info('Calculating single stat values from trigger file')
trigs = SingleDetTriggers(
args.trigger_file,
None,
None,
None,
None,
args.ifo,
premask=data_mask
filter_rank=args.sngl_ranking,
filter_threshold=args.plot_lower_stat_limit,
)
# This is the direct pointer to the HDF file, used later on
trigf = trigs.trigs_f

stat = trigs.get_ranking(args.sngl_ranking)
time = trigs.end_time


logging.info('Processing template boundaries')
max_boundary_id = np.argmax(boundaries)
sorted_boundary_list = np.sort(boundaries)
Expand Down
42 changes: 28 additions & 14 deletions bin/minifollowups/pycbc_injection_minifollowup
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,17 @@ def nearby_missedinj(endtime, snr):
trigger_idx = {}
trigger_snrs = {}
trigger_times = {}
# This finds the triggers near to _any_ missed injection
for trig in single_triggers:
ifo = trig.ifo
with HFile(trig.lfn, 'r') as trig_f:
trigger_idx[ifo], trigger_times[ifo], trigger_snrs[ifo] = \
trigger_idx[ifo], data_tuple = \
trig_f.select(
nearby_missedinj,
f'{ifo}/end_time',
f'{ifo}/snr',
return_indices=True)
)
trigger_times[ifo], trigger_snrs[ifo] = data_tuple

# figure out how many injections to follow up
num_events = int(workflow.cp.get_opt_tags(
Expand Down Expand Up @@ -305,7 +307,7 @@ for num_event in range(num_events):
tags=args.tags + [str(num_event)])[0],)]

for sngl in single_triggers:
# Find the triggers close to this injection at this IFO
# Find the triggers close to _this_ injection at this IFO
ifo = sngl.ifo
trig_tdiff = abs(inj_params[ifo + '_end_time'] - trigger_times[ifo])
nearby = trig_tdiff < args.nearby_triggers_window
Expand All @@ -315,7 +317,7 @@ for num_event in range(num_events):
continue
# Find the loudest SNR in this window
loudest = numpy.argmax(trigger_snrs[ifo][nearby])
# Convert to the indexin the trigger file
# Convert to the index in the trigger file
nearby_trigger_idx = trigger_idx[ifo][nearby][loudest]
# Make the info snippet
sngl_info = mini.make_sngl_ifo(workflow, sngl, tmpltbank_file,
Expand Down Expand Up @@ -381,19 +383,31 @@ for num_event in range(num_events):
use_exact_inj_params=True)

for curr_ifo in args.single_detector_triggers:
# Finding loudest template in this detector near to the injection:
# First, find triggers close to the missed injection
single_fname = args.single_detector_triggers[curr_ifo]
hd_sngl = SingleDetTriggers(single_fname, args.bank_file, None, None,
None, curr_ifo)
end_times = hd_sngl.end_time
# Use SNR here or NewSNR??
snr = hd_sngl.snr
lgc_mask = abs(end_times - inj_params['tc']) < args.inj_window

if len(snr[lgc_mask]) == 0:
idx, _ = HFile(single_fname).select(
lambda t: abs(t - inj_params['tc']) < args.inj_window,
f'{curr_ifo}/end_time',
return_data=False,
)

if len(idx) == 0:
# No events in the injection window for this detector
continue

snr_idx = numpy.arange(len(lgc_mask))[lgc_mask][snr[lgc_mask].argmax()]
hd_sngl.mask = [snr_idx]
hd_sngl = SingleDetTriggers(
single_fname,
curr_ifo,
bank_file=args.bank_file,
premask=idx
)
# Next, find the loudest within this set of triggers
# Use SNR here or NewSNR, or other??
loudest_idx = hd_sngl.snr.argmax()
hd_sngl.apply_mask([loudest_idx])

# What are the parameters of this trigger?
curr_params = copy.deepcopy(inj_params)
curr_params['mass1'] = hd_sngl.mass1[0]
curr_params['mass2'] = hd_sngl.mass2[0]
Expand Down
50 changes: 34 additions & 16 deletions bin/minifollowups/pycbc_page_snglinfo
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ matplotlib.use('Agg')
import lal
import pycbc.version, pycbc.events, pycbc.results, pycbc.pnutils
from pycbc.results import followup
from pycbc.events import stat
from pycbc.events import stat as pystat
from pycbc.io import hdf


Expand Down Expand Up @@ -62,37 +62,53 @@ parser.add_argument('--include-gracedb-link', action='store_true',
parser.add_argument('--significance-file',
help="If given, will search for this trigger's id in the file to see if "
"stat and p_astro values exists for this trigger.")
stat.insert_statistic_option_group(parser,
pystat.insert_statistic_option_group(parser,
default_ranking_statistic='single_ranking_only')


args = parser.parse_args()

pycbc.init_logging(args.verbose)

if args.trigger_id is not None:
# Make a mask which is just the trigger of interest
mask = numpy.array([args.trigger_id])
else:
mask = None

# Get the single-ifo triggers
sngl_file = hdf.SingleDetTriggers(args.single_trigger_file, args.bank_file,
args.veto_file, args.veto_segment_name, None, args.instrument)
sngl_file = hdf.SingleDetTriggers(
args.single_trigger_file,
args.instrument,
bank_file=args.bank_file,
veto_file=args.veto_file,
segment_name=args.veto_segment_name,
premask=mask,
)

rank_method = pystat.get_statistic_from_opts(args, [args.instrument])

rank_method = stat.get_statistic_from_opts(args, [args.instrument])
if args.trigger_id is not None:
sngl_file.mask = [args.trigger_id]
trig_id = args.trigger_id
sngl_file.mask_to_n_loudest_clustered_events(rank_method, n_loudest=1)
stat = sngl_file.stat[0]
# Mask already applied
pass
elif args.n_loudest is not None:
# Cluster by a ranking statistic and retain only the loudest n clustered
# triggers
sngl_file.mask_to_n_loudest_clustered_events(rank_method,
n_loudest=args.n_loudest+1)
sngl_file.mask_to_n_loudest_clustered_events(
rank_method,
n_loudest=args.n_loudest+1
)
else:
raise ValueError("Must give --n-loudest or --trigger-id.")

sds = rank_method.single(sngl_file.trig_dict())
stat = rank_method.rank_stat_single((sngl_file.ifo, sds))
if args.n_loudest is not None:
# Restrict to only the nth loudest, instead of all triggers up to nth
# loudest
stat = sngl_file.stat
l = stat.argsort()
stat = stat[l[0]]
sngl_file.mask = sngl_file.mask[l[0]]
else:
raise ValueError("Must give --n-loudest or --trigger-id.")
sngl_file.apply_mask(l[0])

# make a table for the single detector information ############################
time = sngl_file.end_time
Expand Down Expand Up @@ -142,7 +158,9 @@ if args.ranking_statistic in ["quadsum", "single_ranking_only"]:
else:
# Name would be too long - just call it ranking statistic
stat_name = 'Ranking Statistic'
stat_name_long = ' with '.join([args.ranking_statistic, args.sngl_ranking])
stat_name_long = ' with '.join(
[args.ranking_statistic, args.sngl_ranking]
)

headers.append(stat_name)

Expand Down
15 changes: 7 additions & 8 deletions bin/minifollowups/pycbc_plot_trigger_timeseries
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,13 @@ for ifo in args.single_trigger_files.keys():

# Identify trigger idxs within window of trigger time
with HFile(args.single_trigger_files[ifo], 'r') as data:
idx, _ = data.select(lambda endtime: abs(endtime - t) < args.window,
f'{ifo}/end_time', return_indices=True)
data_mask = numpy.zeros(len(data[f'{ifo}/snr']), dtype=bool)
idx, _ = data.select(
lambda endtime: abs(endtime - t) < args.window,
'end_time',
group=ifo,
return_data=False,
)
data_mask = numpy.zeros(data[ifo]['snr'].size, dtype=bool)
data_mask[idx] = True

if not len(idx):
Expand All @@ -75,13 +79,8 @@ for ifo in args.single_trigger_files.keys():
label=ifo)
continue

# Not using bank file, or veto file, so lots of "None"s here.
trigs = SingleDetTriggers(
args.single_trigger_files[ifo],
None,
None,
None,
None,
ifo,
premask=data_mask
)
Expand Down
32 changes: 8 additions & 24 deletions bin/minifollowups/pycbc_sngl_minifollowup
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ parser.add_argument('--inspiral-data-read-name',
parser.add_argument('--inspiral-data-analyzed-name',
help="Name of inspiral segmentlist containing data "
"analyzed by each analysis job.")
parser.add_argument('--min-snr', type=float, default=6.5,
help="Minimum SNR to consider for loudest triggers")
parser.add_argument('--min-sngl-ranking', type=float, default=6.5,
help="Minimum sngl-ranking to consider for loudest "
"triggers. Default=6.5.")
parser.add_argument('--non-coinc-time-only', action='store_true',
help="If given remove (veto) single-detector triggers "
"that occur during a time when at least one other "
Expand Down Expand Up @@ -130,31 +131,14 @@ insp_data_seglists.coalesce()
num_events = int(workflow.cp.get_opt_tags('workflow-sngl_minifollowups',
'num-sngl-events', ''))

# This helps speed up the processing to ignore a large fraction of triggers
mask = None
f = hdf.HFile(args.single_detector_file, 'r')
n_triggers = f['{}/snr'.format(args.instrument)].size
logging.info("%i triggers in file", n_triggers)
if args.min_snr:
logging.info('Calculating Prefilter')
idx, _ = f.select(lambda snr: snr > args.min_snr,
'{}/snr'.format(args.instrument),
return_indices=True)
mask = numpy.zeros(n_triggers, dtype=bool)
mask[idx] = True
if len(idx) < num_events:
logging.info("Fewer triggers exist after the --min-snr cut (%d) "
"than requested for the minifollowup (%d)",
len(idx), num_events)

trigs = hdf.SingleDetTriggers(
args.single_detector_file,
args.bank_file,
args.foreground_censor_file,
args.foreground_segment_name,
None,
args.instrument,
premask=mask
bank_file=args.bank_file,
veto_file=args.foreground_censor_file,
segment_name=args.foreground_segment_name,
filter_rank=args.sngl_ranking,
filter_threshold=args.min_sngl_ranking
)

# Include gating vetoes
Expand Down
Loading

0 comments on commit be1a69c

Please sign in to comment.