forked from gwastro/pycbc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add dq fitting to Live supervised fitting script (gwastro#4866)
* Add 'dq_state' to list of 'region_ref_datasets' in trigger collation script * Added method skeletons for dq trigger rate calculations to supervision script * Finish first draft of supervision script * First draft of daily dq trigger rates script * Modify handling of template bins to match offline * Split MDC original date calculation into a separate function * Add pycbc_live_combine_dq_trigger_rates script * Make pycbc_plot_dq_flag_likelihood work with low-latency dq * Accept analysis flag name and frame type as arguments to daily dq job * Make plotting work, and other small fixes * Format variable dq file name * Added option to use pregenerated trigger file * for real this time * Gareth comments * Renamed daily -> collated * daily -> collated inside supervision script * Use deterministic hashing * Try to fix h5py error * Use h5py File instead of PyCBC HFile for outputs * HFile -> h5py for outputs * One more Gareth comment
- Loading branch information
Showing
6 changed files
with
598 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
#!/usr/bin/python | ||
|
||
# Copyright 2024 Max Trevor | ||
# | ||
# This program is free software; you can redistribute it and/or modify it | ||
# under the terms of the GNU General Public License as published by the | ||
# Free Software Foundation; either version 3 of the License, or (at your | ||
# option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, but | ||
# WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General | ||
# Public License for more details. | ||
|
||
""" | ||
Calculate the noise trigger rate adjustment as a function of data-quality state | ||
for a day of PyCBC Live triggers. | ||
""" | ||
|
||
import logging | ||
import argparse | ||
import hashlib | ||
|
||
import numpy as np | ||
import h5py as h5 | ||
|
||
import pycbc | ||
from pycbc.io import HFile | ||
|
||
import gwdatafind | ||
|
||
from gwpy.timeseries import TimeSeriesDict | ||
from gwpy.segments import Segment, DataQualityFlag | ||
|
||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument("--trigger-file", required=True) | ||
parser.add_argument("--ifo", required=True) | ||
parser.add_argument("--gps-start-time", required=True, type=int) | ||
parser.add_argument("--gps-end-time", required=True, type=int) | ||
parser.add_argument("--template-bin-file", required=True) | ||
parser.add_argument("--analysis-frame-type", required=True) | ||
parser.add_argument("--analysis-flag-name", required=True) | ||
parser.add_argument("--dq-channel", required=True) | ||
parser.add_argument("--dq-ok-channel", required=True) | ||
parser.add_argument("--dq-thresh", required=True, type=float) | ||
parser.add_argument("--output", required=True) | ||
pycbc.add_common_pycbc_options(parser) | ||
args = parser.parse_args() | ||
|
||
pycbc.init_logging(args.verbose) | ||
|
||
# Get observing segs | ||
ar_flag_name = args.analysis_flag_name.format(ifo=args.ifo) | ||
day_seg = Segment(args.gps_start_time, args.gps_end_time) | ||
observing_flag = DataQualityFlag.query(ar_flag_name, day_seg) | ||
observing_segs = observing_flag.active | ||
livetime = observing_flag.livetime | ||
logging.info(f'Found {livetime} seconds of observing time at {args.ifo}.') | ||
|
||
# for each segment, check how much time was dq flagged | ||
flagged_time = 0 | ||
frame_type = args.analysis_frame_type.format(ifo=args.ifo) | ||
dq_channel = args.dq_channel.format(ifo=args.ifo) | ||
dq_ok_channel = args.dq_ok_channel.format(ifo=args.ifo) | ||
for seg in observing_segs: | ||
frames = gwdatafind.find_urls( | ||
args.ifo[0], | ||
frame_type, | ||
seg[0], | ||
seg[1], | ||
) | ||
tsdict = TimeSeriesDict.read( | ||
frames, | ||
channels=[dq_channel, dq_ok_channel], | ||
start=seg[0], | ||
end=seg[1], | ||
) | ||
dq_ok_flag = (tsdict[dq_ok_channel] == 1).to_dqflag() | ||
dq_flag = (tsdict[dq_channel] <= args.dq_thresh).to_dqflag() | ||
flagged_time += (dq_flag & dq_ok_flag).livetime | ||
logging.info(f'Found {flagged_time} seconds of dq flagged time at {args.ifo}.') | ||
|
||
bg_livetime = livetime - flagged_time | ||
state_time = np.array([bg_livetime, flagged_time]) | ||
|
||
# read in template bins | ||
logging.info(f'Reading template bins from {args.template_bin_file}') | ||
template_bins = {} | ||
num_templates = 0 | ||
with HFile(args.template_bin_file, 'r') as bin_file: | ||
f_lower = bin_file.attrs['f_lower'] | ||
bank_file = bin_file.attrs['bank_file'] | ||
bin_string = bin_file.attrs['background_bins'] | ||
|
||
# only ever one group in this file | ||
grp = bin_file[list(bin_file.keys())[0]] | ||
num_bins = len(grp.keys()) | ||
for k in grp.keys(): | ||
template_bins[k] = grp[k]['tids'][:] | ||
num_templates += len(template_bins[k]) | ||
|
||
# for each bin, get total number of triggers and number of dq flagged triggers | ||
bin_total_triggers = np.zeros(num_bins) | ||
bin_dq_triggers = np.zeros(num_bins) | ||
|
||
logging.info(f'Reading triggers from {args.trigger_file}') | ||
with HFile(args.trigger_file, 'r') as trigf: | ||
for bin_name in template_bins.keys(): | ||
# bins are named as 'bin{bin_num}' | ||
bin_num = int(bin_name[3:]) | ||
template_ids = template_bins[bin_name] | ||
for template_id in template_ids: | ||
# get dq states for all triggers with this template | ||
# dq state is either 0 or 1 for each trigger | ||
dq_key = trigf[f'{args.ifo}/dq_state_template'][template_id] | ||
dq_states = trigf[f'{args.ifo}/dq_state'][dq_key] | ||
|
||
# update trigger counts | ||
bin_total_triggers[bin_num] += len(dq_states) | ||
bin_dq_triggers[bin_num] += np.sum(dq_states) | ||
|
||
# write outputs to file | ||
logging.info(f'Writing results to {args.output}') | ||
with h5.File(args.output, 'w') as f: | ||
ifo_group = f.create_group(args.ifo) | ||
ifo_group.create_dataset('observing_livetime', data=livetime) | ||
ifo_group.create_dataset('dq_flag_livetime', data=flagged_time) | ||
bin_group = ifo_group.create_group('bins') | ||
for bin_name in template_bins.keys(): | ||
bin_num = int(bin_name[3:]) | ||
bgrp = bin_group.create_group(bin_name) | ||
bgrp.create_dataset('tids', data=template_bins[bin_name]) | ||
bgrp.create_dataset('total_triggers', data=bin_total_triggers[bin_num]) | ||
bgrp.create_dataset('dq_triggers', data=bin_dq_triggers[bin_num]) | ||
|
||
bg_triggers = bin_total_triggers[bin_num] - bin_dq_triggers[bin_num] | ||
num_trigs = np.array([bg_triggers, bin_dq_triggers[bin_num]]) | ||
trig_rates = num_trigs / state_time | ||
mean_rate = bin_total_triggers[bin_num] / livetime | ||
normalized_rates = trig_rates / mean_rate | ||
bgrp.create_dataset('dq_rates', data=normalized_rates) | ||
|
||
f.attrs['dq_thresh'] = args.dq_thresh | ||
f.attrs['dq_channel'] = dq_channel | ||
f.attrs['dq_ok_channel'] = dq_ok_channel | ||
f.attrs['gps_start_time'] = args.gps_start_time | ||
f.attrs['gps_end_time'] = args.gps_end_time | ||
f.attrs['f_lower'] = f_lower | ||
f.attrs['bank_file'] = bank_file | ||
f.attrs['background_bins'] = bin_string | ||
|
||
# hash is used to check if different files have compatible settings | ||
settings_to_hash = [args.dq_thresh, dq_channel, dq_ok_channel, | ||
f_lower, bank_file, bin_string] | ||
setting_str = ' '.join([str(s) for s in settings_to_hash]) | ||
hash_object = hashlib.sha256(setting_str.encode()) | ||
f.attrs['settings_hash'] = hash_object.hexdigest() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/usr/bin/python | ||
|
||
# Copyright 2024 Max Trevor | ||
# | ||
# This program is free software; you can redistribute it and/or modify it | ||
# under the terms of the GNU General Public License as published by the | ||
# Free Software Foundation; either version 3 of the License, or (at your | ||
# option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, but | ||
# WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General | ||
# Public License for more details. | ||
|
||
"""Combine the data-quality adjusted trigger rates from multiple days.""" | ||
|
||
import logging | ||
import argparse | ||
|
||
import numpy as np | ||
import h5py as h5 | ||
|
||
import pycbc | ||
from pycbc.io import HFile | ||
|
||
parser = argparse.ArgumentParser(description=__doc__) | ||
pycbc.add_common_pycbc_options(parser) | ||
parser.add_argument("--daily-dq-files", nargs="+", required=True, | ||
help="Files containing daily dq trigger rates") | ||
parser.add_argument("--ifo", required=True) | ||
parser.add_argument("--output", required=True) | ||
args = parser.parse_args() | ||
|
||
pycbc.init_logging(args.verbose) | ||
|
||
daily_files = args.daily_dq_files | ||
daily_files.sort() | ||
|
||
# need all files to use compatible settings | ||
# get settings hash from the last file | ||
# we will only use files that have the same hash | ||
with HFile(daily_files[-1], 'r') as last_file: | ||
settings_hash = last_file.attrs['settings_hash'] | ||
bin_str = last_file.attrs['background_bins'] | ||
bank_file = last_file.attrs['bank_file'] | ||
f_lower = last_file.attrs['f_lower'] | ||
dq_thresh = last_file.attrs['dq_thresh'] | ||
dq_channel = last_file.attrs['dq_channel'] | ||
dq_ok_channel = last_file.attrs['dq_ok_channel'] | ||
bin_group = last_file[f'{args.ifo}/bins'] | ||
template_bins = {bin_name: bin_group[bin_name]['tids'][:] | ||
for bin_name in bin_group.keys()} | ||
num_bins = len(bin_str.split()) | ||
|
||
total_livetime = 0 | ||
flagged_livetime = 0 | ||
total_triggers = np.zeros(num_bins) | ||
flagged_triggers = np.zeros(num_bins) | ||
for fpath in daily_files: | ||
with HFile(fpath, 'r') as f: | ||
if f.attrs['settings_hash'] != settings_hash: | ||
warning_str = f'File {fpath} has incompatible settings, skipping' | ||
logging.warning(warning_str) | ||
continue | ||
total_livetime += f[f'{args.ifo}/observing_livetime'][()] | ||
flagged_livetime += f[f'{args.ifo}/dq_flag_livetime'][()] | ||
bin_group = f[f'{args.ifo}/bins'] | ||
for bin_name in bin_group.keys(): | ||
# bins are named as 'bin{bin_num}' | ||
bin_num = int(bin_name[3:]) | ||
bgrp = bin_group[bin_name] | ||
total_triggers[bin_num] += bgrp['total_triggers'][()] | ||
flagged_triggers[bin_num] += bgrp['dq_triggers'][()] | ||
|
||
total_trigger_rate = total_triggers / total_livetime | ||
flag_trigger_rate = flagged_triggers / flagged_livetime | ||
bg_triggers = total_triggers - flagged_triggers | ||
bg_livetime = total_livetime - flagged_livetime | ||
bg_trigger_rate = bg_triggers / bg_livetime | ||
|
||
# save results | ||
with h5.File(args.output, 'w') as f: | ||
ifo_grp = f.create_group(args.ifo) | ||
all_bin_grp = ifo_grp.create_group('bins') | ||
|
||
for bin_name, bin_tids in template_bins.items(): | ||
bin_grp = all_bin_grp.create_group(bin_name) | ||
bin_grp['tids'] = bin_tids | ||
bin_num = int(bin_name[3:]) | ||
bin_trig_rates = [bg_trigger_rate[bin_num], flag_trigger_rate[bin_num]] | ||
bin_trig_rates /= total_trigger_rate[bin_num] | ||
bin_grp['dq_rates'] = bin_trig_rates | ||
bin_grp['num_triggers'] = total_triggers[bin_num] | ||
|
||
f.attrs['settings_hash'] = settings_hash | ||
f.attrs['stat'] = f'{args.ifo}-dq_stat_info' | ||
f.attrs['total_livetime'] = total_livetime | ||
f.attrs['flagged_livetime'] = flagged_livetime | ||
f.attrs['dq_thresh'] = dq_thresh | ||
f.attrs['dq_channel'] = dq_channel | ||
f.attrs['dq_ok_channel'] = dq_ok_channel | ||
f.attrs['background_bins'] = bin_str | ||
f.attrs['bank_file'] = bank_file | ||
f.attrs['f_lower'] = f_lower |
Oops, something went wrong.