Skip to content

Commit

Permalink
Add dq fitting to Live supervised fitting script (gwastro#4866)
Browse files Browse the repository at this point in the history
* Add 'dq_state' to list of 'region_ref_datasets' in trigger collation script

* Added method skeletons for dq trigger rate calculations to supervision script

* Finish first draft of supervision script

* First draft of daily dq trigger rates script

* Modify handling of template bins to match offline

* Split MDC original date calculation into a separate function

* Add pycbc_live_combine_dq_trigger_rates script

* Make pycbc_plot_dq_flag_likelihood work with low-latency dq

* Accept analysis flag name and frame type as arguments to daily dq job

* Make plotting work, and other small fixes

* Format variable dq file name

* Added option to use pregenerated trigger file

* for real this time

* Gareth comments

* Renamed daily -> collated

* daily -> collated inside supervision script

* Use deterministic hashing

* Try to fix h5py error

* Use h5py File instead of PyCBC HFile for outputs

* HFile -> h5py for outputs

* One more Gareth comment
  • Loading branch information
maxtrevor authored and prayush committed Nov 21, 2024
1 parent 172c4c0 commit 3b694bc
Show file tree
Hide file tree
Showing 6 changed files with 598 additions and 115 deletions.
3 changes: 2 additions & 1 deletion bin/all_sky_search/pycbc_bin_templates
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ with h5.File(args.output_file, 'w') as f:
bin_tids = bin_dict[bin_name]
grp = ifo_grp.create_group(bin_name)
grp['tids'] = bin_tids
f.attrs['bank_file'] = args.bank_file
f.attrs['f_lower'] = args.f_lower
f.attrs['background_bins'] = args.background_bins
f.attrs['background_bins'] = ' '.join(args.background_bins)

logging.info('Finished')
3 changes: 2 additions & 1 deletion bin/live/pycbc_live_collate_triggers
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,15 @@ with h5py.File(args.output_file, 'w') as destination:
sorted_key = triggers[key][:][sorted_indices]
triggers[key][:] = sorted_key


logging.info("Setting up region references")
# Datasets which need region references:
region_ref_datasets = ('chisq_dof', 'chisq', 'coa_phase',
'end_time', 'sg_chisq', 'snr',
'template_duration', 'sigmasq')
if 'psd_var_val' in triggers.keys():
region_ref_datasets += ('psd_var_val',)
if 'dq_state' in triggers.keys():
region_ref_datasets += ('dq_state',)

start_boundaries = template_boundaries
end_boundaries = numpy.roll(start_boundaries, -1)
Expand Down
157 changes: 157 additions & 0 deletions bin/live/pycbc_live_collated_dq_trigger_rates
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#!/usr/bin/python

# Copyright 2024 Max Trevor
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.

"""
Calculate the noise trigger rate adjustment as a function of data-quality state
for a day of PyCBC Live triggers.
"""

import logging
import argparse
import hashlib

import numpy as np
import h5py as h5

import pycbc
from pycbc.io import HFile

import gwdatafind

from gwpy.timeseries import TimeSeriesDict
from gwpy.segments import Segment, DataQualityFlag

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--trigger-file", required=True)
parser.add_argument("--ifo", required=True)
parser.add_argument("--gps-start-time", required=True, type=int)
parser.add_argument("--gps-end-time", required=True, type=int)
parser.add_argument("--template-bin-file", required=True)
parser.add_argument("--analysis-frame-type", required=True)
parser.add_argument("--analysis-flag-name", required=True)
parser.add_argument("--dq-channel", required=True)
parser.add_argument("--dq-ok-channel", required=True)
parser.add_argument("--dq-thresh", required=True, type=float)
parser.add_argument("--output", required=True)
pycbc.add_common_pycbc_options(parser)
args = parser.parse_args()

pycbc.init_logging(args.verbose)

# Get observing segs
ar_flag_name = args.analysis_flag_name.format(ifo=args.ifo)
day_seg = Segment(args.gps_start_time, args.gps_end_time)
observing_flag = DataQualityFlag.query(ar_flag_name, day_seg)
observing_segs = observing_flag.active
livetime = observing_flag.livetime
logging.info(f'Found {livetime} seconds of observing time at {args.ifo}.')

# for each segment, check how much time was dq flagged
flagged_time = 0
frame_type = args.analysis_frame_type.format(ifo=args.ifo)
dq_channel = args.dq_channel.format(ifo=args.ifo)
dq_ok_channel = args.dq_ok_channel.format(ifo=args.ifo)
for seg in observing_segs:
frames = gwdatafind.find_urls(
args.ifo[0],
frame_type,
seg[0],
seg[1],
)
tsdict = TimeSeriesDict.read(
frames,
channels=[dq_channel, dq_ok_channel],
start=seg[0],
end=seg[1],
)
dq_ok_flag = (tsdict[dq_ok_channel] == 1).to_dqflag()
dq_flag = (tsdict[dq_channel] <= args.dq_thresh).to_dqflag()
flagged_time += (dq_flag & dq_ok_flag).livetime
logging.info(f'Found {flagged_time} seconds of dq flagged time at {args.ifo}.')

bg_livetime = livetime - flagged_time
state_time = np.array([bg_livetime, flagged_time])

# read in template bins
logging.info(f'Reading template bins from {args.template_bin_file}')
template_bins = {}
num_templates = 0
with HFile(args.template_bin_file, 'r') as bin_file:
f_lower = bin_file.attrs['f_lower']
bank_file = bin_file.attrs['bank_file']
bin_string = bin_file.attrs['background_bins']

# only ever one group in this file
grp = bin_file[list(bin_file.keys())[0]]
num_bins = len(grp.keys())
for k in grp.keys():
template_bins[k] = grp[k]['tids'][:]
num_templates += len(template_bins[k])

# for each bin, get total number of triggers and number of dq flagged triggers
bin_total_triggers = np.zeros(num_bins)
bin_dq_triggers = np.zeros(num_bins)

logging.info(f'Reading triggers from {args.trigger_file}')
with HFile(args.trigger_file, 'r') as trigf:
for bin_name in template_bins.keys():
# bins are named as 'bin{bin_num}'
bin_num = int(bin_name[3:])
template_ids = template_bins[bin_name]
for template_id in template_ids:
# get dq states for all triggers with this template
# dq state is either 0 or 1 for each trigger
dq_key = trigf[f'{args.ifo}/dq_state_template'][template_id]
dq_states = trigf[f'{args.ifo}/dq_state'][dq_key]

# update trigger counts
bin_total_triggers[bin_num] += len(dq_states)
bin_dq_triggers[bin_num] += np.sum(dq_states)

# write outputs to file
logging.info(f'Writing results to {args.output}')
with h5.File(args.output, 'w') as f:
ifo_group = f.create_group(args.ifo)
ifo_group.create_dataset('observing_livetime', data=livetime)
ifo_group.create_dataset('dq_flag_livetime', data=flagged_time)
bin_group = ifo_group.create_group('bins')
for bin_name in template_bins.keys():
bin_num = int(bin_name[3:])
bgrp = bin_group.create_group(bin_name)
bgrp.create_dataset('tids', data=template_bins[bin_name])
bgrp.create_dataset('total_triggers', data=bin_total_triggers[bin_num])
bgrp.create_dataset('dq_triggers', data=bin_dq_triggers[bin_num])

bg_triggers = bin_total_triggers[bin_num] - bin_dq_triggers[bin_num]
num_trigs = np.array([bg_triggers, bin_dq_triggers[bin_num]])
trig_rates = num_trigs / state_time
mean_rate = bin_total_triggers[bin_num] / livetime
normalized_rates = trig_rates / mean_rate
bgrp.create_dataset('dq_rates', data=normalized_rates)

f.attrs['dq_thresh'] = args.dq_thresh
f.attrs['dq_channel'] = dq_channel
f.attrs['dq_ok_channel'] = dq_ok_channel
f.attrs['gps_start_time'] = args.gps_start_time
f.attrs['gps_end_time'] = args.gps_end_time
f.attrs['f_lower'] = f_lower
f.attrs['bank_file'] = bank_file
f.attrs['background_bins'] = bin_string

# hash is used to check if different files have compatible settings
settings_to_hash = [args.dq_thresh, dq_channel, dq_ok_channel,
f_lower, bank_file, bin_string]
setting_str = ' '.join([str(s) for s in settings_to_hash])
hash_object = hashlib.sha256(setting_str.encode())
f.attrs['settings_hash'] = hash_object.hexdigest()
104 changes: 104 additions & 0 deletions bin/live/pycbc_live_combine_dq_trigger_rates
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/python

# Copyright 2024 Max Trevor
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.

"""Combine the data-quality adjusted trigger rates from multiple days."""

import logging
import argparse

import numpy as np
import h5py as h5

import pycbc
from pycbc.io import HFile

parser = argparse.ArgumentParser(description=__doc__)
pycbc.add_common_pycbc_options(parser)
parser.add_argument("--daily-dq-files", nargs="+", required=True,
help="Files containing daily dq trigger rates")
parser.add_argument("--ifo", required=True)
parser.add_argument("--output", required=True)
args = parser.parse_args()

pycbc.init_logging(args.verbose)

daily_files = args.daily_dq_files
daily_files.sort()

# need all files to use compatible settings
# get settings hash from the last file
# we will only use files that have the same hash
with HFile(daily_files[-1], 'r') as last_file:
settings_hash = last_file.attrs['settings_hash']
bin_str = last_file.attrs['background_bins']
bank_file = last_file.attrs['bank_file']
f_lower = last_file.attrs['f_lower']
dq_thresh = last_file.attrs['dq_thresh']
dq_channel = last_file.attrs['dq_channel']
dq_ok_channel = last_file.attrs['dq_ok_channel']
bin_group = last_file[f'{args.ifo}/bins']
template_bins = {bin_name: bin_group[bin_name]['tids'][:]
for bin_name in bin_group.keys()}
num_bins = len(bin_str.split())

total_livetime = 0
flagged_livetime = 0
total_triggers = np.zeros(num_bins)
flagged_triggers = np.zeros(num_bins)
for fpath in daily_files:
with HFile(fpath, 'r') as f:
if f.attrs['settings_hash'] != settings_hash:
warning_str = f'File {fpath} has incompatible settings, skipping'
logging.warning(warning_str)
continue
total_livetime += f[f'{args.ifo}/observing_livetime'][()]
flagged_livetime += f[f'{args.ifo}/dq_flag_livetime'][()]
bin_group = f[f'{args.ifo}/bins']
for bin_name in bin_group.keys():
# bins are named as 'bin{bin_num}'
bin_num = int(bin_name[3:])
bgrp = bin_group[bin_name]
total_triggers[bin_num] += bgrp['total_triggers'][()]
flagged_triggers[bin_num] += bgrp['dq_triggers'][()]

total_trigger_rate = total_triggers / total_livetime
flag_trigger_rate = flagged_triggers / flagged_livetime
bg_triggers = total_triggers - flagged_triggers
bg_livetime = total_livetime - flagged_livetime
bg_trigger_rate = bg_triggers / bg_livetime

# save results
with h5.File(args.output, 'w') as f:
ifo_grp = f.create_group(args.ifo)
all_bin_grp = ifo_grp.create_group('bins')

for bin_name, bin_tids in template_bins.items():
bin_grp = all_bin_grp.create_group(bin_name)
bin_grp['tids'] = bin_tids
bin_num = int(bin_name[3:])
bin_trig_rates = [bg_trigger_rate[bin_num], flag_trigger_rate[bin_num]]
bin_trig_rates /= total_trigger_rate[bin_num]
bin_grp['dq_rates'] = bin_trig_rates
bin_grp['num_triggers'] = total_triggers[bin_num]

f.attrs['settings_hash'] = settings_hash
f.attrs['stat'] = f'{args.ifo}-dq_stat_info'
f.attrs['total_livetime'] = total_livetime
f.attrs['flagged_livetime'] = flagged_livetime
f.attrs['dq_thresh'] = dq_thresh
f.attrs['dq_channel'] = dq_channel
f.attrs['dq_ok_channel'] = dq_ok_channel
f.attrs['background_bins'] = bin_str
f.attrs['bank_file'] = bank_file
f.attrs['f_lower'] = f_lower
Loading

0 comments on commit 3b694bc

Please sign in to comment.