From 3b694bcaf0e2e6eb7e3ff13c14bd979f2b4170ab Mon Sep 17 00:00:00 2001 From: maxtrevor <65971534+maxtrevor@users.noreply.github.com> Date: Fri, 13 Sep 2024 10:45:51 -0400 Subject: [PATCH] Add dq fitting to Live supervised fitting script (#4866) * Add 'dq_state' to list of 'region_ref_datasets' in trigger collation script * Added method skeletons for dq trigger rate calculations to supervision script * Finish first draft of supervision script * First draft of daily dq trigger rates script * Modify handling of template bins to match offline * Split MDC original date calculation into a separate function * Add pycbc_live_combine_dq_trigger_rates script * Make pycbc_plot_dq_flag_likelihood work with low-latency dq * Accept analysis flag name and frame type as arguments to daily dq job * Make plotting work, and other small fixes * Format variable dq file name * Added option to use pregenerated trigger file * for real this time * Gareth comments * Renamed daily -> collated * daily -> collated inside supervision script * Use deterministic hashing * Try to fix h5py error * Use h5py File instead of PyCBC HFile for outputs * HFile -> h5py for outputs * One more Gareth comment --- bin/all_sky_search/pycbc_bin_templates | 3 +- bin/live/pycbc_live_collate_triggers | 3 +- bin/live/pycbc_live_collated_dq_trigger_rates | 157 +++++++ bin/live/pycbc_live_combine_dq_trigger_rates | 104 +++++ ...pycbc_live_supervise_collated_trigger_fits | 403 +++++++++++++----- bin/plotting/pycbc_plot_dq_flag_likelihood | 43 +- 6 files changed, 598 insertions(+), 115 deletions(-) create mode 100644 bin/live/pycbc_live_collated_dq_trigger_rates create mode 100644 bin/live/pycbc_live_combine_dq_trigger_rates diff --git a/bin/all_sky_search/pycbc_bin_templates b/bin/all_sky_search/pycbc_bin_templates index 807c234adbe..196040f2b38 100755 --- a/bin/all_sky_search/pycbc_bin_templates +++ b/bin/all_sky_search/pycbc_bin_templates @@ -48,7 +48,8 @@ with h5.File(args.output_file, 'w') as f: bin_tids = bin_dict[bin_name] grp = ifo_grp.create_group(bin_name) grp['tids'] = bin_tids + f.attrs['bank_file'] = args.bank_file f.attrs['f_lower'] = args.f_lower - f.attrs['background_bins'] = args.background_bins + f.attrs['background_bins'] = ' '.join(args.background_bins) logging.info('Finished') diff --git a/bin/live/pycbc_live_collate_triggers b/bin/live/pycbc_live_collate_triggers index 5e53352b208..f2dff317f3b 100644 --- a/bin/live/pycbc_live_collate_triggers +++ b/bin/live/pycbc_live_collate_triggers @@ -231,7 +231,6 @@ with h5py.File(args.output_file, 'w') as destination: sorted_key = triggers[key][:][sorted_indices] triggers[key][:] = sorted_key - logging.info("Setting up region references") # Datasets which need region references: region_ref_datasets = ('chisq_dof', 'chisq', 'coa_phase', @@ -239,6 +238,8 @@ with h5py.File(args.output_file, 'w') as destination: 'template_duration', 'sigmasq') if 'psd_var_val' in triggers.keys(): region_ref_datasets += ('psd_var_val',) + if 'dq_state' in triggers.keys(): + region_ref_datasets += ('dq_state',) start_boundaries = template_boundaries end_boundaries = numpy.roll(start_boundaries, -1) diff --git a/bin/live/pycbc_live_collated_dq_trigger_rates b/bin/live/pycbc_live_collated_dq_trigger_rates new file mode 100644 index 00000000000..7906e7bd6d0 --- /dev/null +++ b/bin/live/pycbc_live_collated_dq_trigger_rates @@ -0,0 +1,157 @@ +#!/usr/bin/python + +# Copyright 2024 Max Trevor +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. + +""" +Calculate the noise trigger rate adjustment as a function of data-quality state +for a day of PyCBC Live triggers. +""" + +import logging +import argparse +import hashlib + +import numpy as np +import h5py as h5 + +import pycbc +from pycbc.io import HFile + +import gwdatafind + +from gwpy.timeseries import TimeSeriesDict +from gwpy.segments import Segment, DataQualityFlag + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument("--trigger-file", required=True) +parser.add_argument("--ifo", required=True) +parser.add_argument("--gps-start-time", required=True, type=int) +parser.add_argument("--gps-end-time", required=True, type=int) +parser.add_argument("--template-bin-file", required=True) +parser.add_argument("--analysis-frame-type", required=True) +parser.add_argument("--analysis-flag-name", required=True) +parser.add_argument("--dq-channel", required=True) +parser.add_argument("--dq-ok-channel", required=True) +parser.add_argument("--dq-thresh", required=True, type=float) +parser.add_argument("--output", required=True) +pycbc.add_common_pycbc_options(parser) +args = parser.parse_args() + +pycbc.init_logging(args.verbose) + +# Get observing segs +ar_flag_name = args.analysis_flag_name.format(ifo=args.ifo) +day_seg = Segment(args.gps_start_time, args.gps_end_time) +observing_flag = DataQualityFlag.query(ar_flag_name, day_seg) +observing_segs = observing_flag.active +livetime = observing_flag.livetime +logging.info(f'Found {livetime} seconds of observing time at {args.ifo}.') + +# for each segment, check how much time was dq flagged +flagged_time = 0 +frame_type = args.analysis_frame_type.format(ifo=args.ifo) +dq_channel = args.dq_channel.format(ifo=args.ifo) +dq_ok_channel = args.dq_ok_channel.format(ifo=args.ifo) +for seg in observing_segs: + frames = gwdatafind.find_urls( + args.ifo[0], + frame_type, + seg[0], + seg[1], + ) + tsdict = TimeSeriesDict.read( + frames, + channels=[dq_channel, dq_ok_channel], + start=seg[0], + end=seg[1], + ) + dq_ok_flag = (tsdict[dq_ok_channel] == 1).to_dqflag() + dq_flag = (tsdict[dq_channel] <= args.dq_thresh).to_dqflag() + flagged_time += (dq_flag & dq_ok_flag).livetime +logging.info(f'Found {flagged_time} seconds of dq flagged time at {args.ifo}.') + +bg_livetime = livetime - flagged_time +state_time = np.array([bg_livetime, flagged_time]) + +# read in template bins +logging.info(f'Reading template bins from {args.template_bin_file}') +template_bins = {} +num_templates = 0 +with HFile(args.template_bin_file, 'r') as bin_file: + f_lower = bin_file.attrs['f_lower'] + bank_file = bin_file.attrs['bank_file'] + bin_string = bin_file.attrs['background_bins'] + + # only ever one group in this file + grp = bin_file[list(bin_file.keys())[0]] + num_bins = len(grp.keys()) + for k in grp.keys(): + template_bins[k] = grp[k]['tids'][:] + num_templates += len(template_bins[k]) + +# for each bin, get total number of triggers and number of dq flagged triggers +bin_total_triggers = np.zeros(num_bins) +bin_dq_triggers = np.zeros(num_bins) + +logging.info(f'Reading triggers from {args.trigger_file}') +with HFile(args.trigger_file, 'r') as trigf: + for bin_name in template_bins.keys(): + # bins are named as 'bin{bin_num}' + bin_num = int(bin_name[3:]) + template_ids = template_bins[bin_name] + for template_id in template_ids: + # get dq states for all triggers with this template + # dq state is either 0 or 1 for each trigger + dq_key = trigf[f'{args.ifo}/dq_state_template'][template_id] + dq_states = trigf[f'{args.ifo}/dq_state'][dq_key] + + # update trigger counts + bin_total_triggers[bin_num] += len(dq_states) + bin_dq_triggers[bin_num] += np.sum(dq_states) + +# write outputs to file +logging.info(f'Writing results to {args.output}') +with h5.File(args.output, 'w') as f: + ifo_group = f.create_group(args.ifo) + ifo_group.create_dataset('observing_livetime', data=livetime) + ifo_group.create_dataset('dq_flag_livetime', data=flagged_time) + bin_group = ifo_group.create_group('bins') + for bin_name in template_bins.keys(): + bin_num = int(bin_name[3:]) + bgrp = bin_group.create_group(bin_name) + bgrp.create_dataset('tids', data=template_bins[bin_name]) + bgrp.create_dataset('total_triggers', data=bin_total_triggers[bin_num]) + bgrp.create_dataset('dq_triggers', data=bin_dq_triggers[bin_num]) + + bg_triggers = bin_total_triggers[bin_num] - bin_dq_triggers[bin_num] + num_trigs = np.array([bg_triggers, bin_dq_triggers[bin_num]]) + trig_rates = num_trigs / state_time + mean_rate = bin_total_triggers[bin_num] / livetime + normalized_rates = trig_rates / mean_rate + bgrp.create_dataset('dq_rates', data=normalized_rates) + + f.attrs['dq_thresh'] = args.dq_thresh + f.attrs['dq_channel'] = dq_channel + f.attrs['dq_ok_channel'] = dq_ok_channel + f.attrs['gps_start_time'] = args.gps_start_time + f.attrs['gps_end_time'] = args.gps_end_time + f.attrs['f_lower'] = f_lower + f.attrs['bank_file'] = bank_file + f.attrs['background_bins'] = bin_string + + # hash is used to check if different files have compatible settings + settings_to_hash = [args.dq_thresh, dq_channel, dq_ok_channel, + f_lower, bank_file, bin_string] + setting_str = ' '.join([str(s) for s in settings_to_hash]) + hash_object = hashlib.sha256(setting_str.encode()) + f.attrs['settings_hash'] = hash_object.hexdigest() diff --git a/bin/live/pycbc_live_combine_dq_trigger_rates b/bin/live/pycbc_live_combine_dq_trigger_rates new file mode 100644 index 00000000000..642727f1610 --- /dev/null +++ b/bin/live/pycbc_live_combine_dq_trigger_rates @@ -0,0 +1,104 @@ +#!/usr/bin/python + +# Copyright 2024 Max Trevor +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. + +"""Combine the data-quality adjusted trigger rates from multiple days.""" + +import logging +import argparse + +import numpy as np +import h5py as h5 + +import pycbc +from pycbc.io import HFile + +parser = argparse.ArgumentParser(description=__doc__) +pycbc.add_common_pycbc_options(parser) +parser.add_argument("--daily-dq-files", nargs="+", required=True, + help="Files containing daily dq trigger rates") +parser.add_argument("--ifo", required=True) +parser.add_argument("--output", required=True) +args = parser.parse_args() + +pycbc.init_logging(args.verbose) + +daily_files = args.daily_dq_files +daily_files.sort() + +# need all files to use compatible settings +# get settings hash from the last file +# we will only use files that have the same hash +with HFile(daily_files[-1], 'r') as last_file: + settings_hash = last_file.attrs['settings_hash'] + bin_str = last_file.attrs['background_bins'] + bank_file = last_file.attrs['bank_file'] + f_lower = last_file.attrs['f_lower'] + dq_thresh = last_file.attrs['dq_thresh'] + dq_channel = last_file.attrs['dq_channel'] + dq_ok_channel = last_file.attrs['dq_ok_channel'] + bin_group = last_file[f'{args.ifo}/bins'] + template_bins = {bin_name: bin_group[bin_name]['tids'][:] + for bin_name in bin_group.keys()} +num_bins = len(bin_str.split()) + +total_livetime = 0 +flagged_livetime = 0 +total_triggers = np.zeros(num_bins) +flagged_triggers = np.zeros(num_bins) +for fpath in daily_files: + with HFile(fpath, 'r') as f: + if f.attrs['settings_hash'] != settings_hash: + warning_str = f'File {fpath} has incompatible settings, skipping' + logging.warning(warning_str) + continue + total_livetime += f[f'{args.ifo}/observing_livetime'][()] + flagged_livetime += f[f'{args.ifo}/dq_flag_livetime'][()] + bin_group = f[f'{args.ifo}/bins'] + for bin_name in bin_group.keys(): + # bins are named as 'bin{bin_num}' + bin_num = int(bin_name[3:]) + bgrp = bin_group[bin_name] + total_triggers[bin_num] += bgrp['total_triggers'][()] + flagged_triggers[bin_num] += bgrp['dq_triggers'][()] + +total_trigger_rate = total_triggers / total_livetime +flag_trigger_rate = flagged_triggers / flagged_livetime +bg_triggers = total_triggers - flagged_triggers +bg_livetime = total_livetime - flagged_livetime +bg_trigger_rate = bg_triggers / bg_livetime + +# save results +with h5.File(args.output, 'w') as f: + ifo_grp = f.create_group(args.ifo) + all_bin_grp = ifo_grp.create_group('bins') + + for bin_name, bin_tids in template_bins.items(): + bin_grp = all_bin_grp.create_group(bin_name) + bin_grp['tids'] = bin_tids + bin_num = int(bin_name[3:]) + bin_trig_rates = [bg_trigger_rate[bin_num], flag_trigger_rate[bin_num]] + bin_trig_rates /= total_trigger_rate[bin_num] + bin_grp['dq_rates'] = bin_trig_rates + bin_grp['num_triggers'] = total_triggers[bin_num] + + f.attrs['settings_hash'] = settings_hash + f.attrs['stat'] = f'{args.ifo}-dq_stat_info' + f.attrs['total_livetime'] = total_livetime + f.attrs['flagged_livetime'] = flagged_livetime + f.attrs['dq_thresh'] = dq_thresh + f.attrs['dq_channel'] = dq_channel + f.attrs['dq_ok_channel'] = dq_ok_channel + f.attrs['background_bins'] = bin_str + f.attrs['bank_file'] = bank_file + f.attrs['f_lower'] = f_lower diff --git a/bin/live/pycbc_live_supervise_collated_trigger_fits b/bin/live/pycbc_live_supervise_collated_trigger_fits index 087ae5d1339..178f5398db5 100755 --- a/bin/live/pycbc_live_supervise_collated_trigger_fits +++ b/bin/live/pycbc_live_supervise_collated_trigger_fits @@ -7,20 +7,21 @@ and the associated plots. import re import logging import argparse -from datetime import datetime, timedelta -from dateutil.relativedelta import relativedelta -import os import shutil -import subprocess +import os + import numpy as np +from datetime import datetime, timedelta + +import lal from lal import gpstime import pycbc -from pycbc.io.hdf import HFile from pycbc.live import supervision as sv from pycbc.types.config import InterpolatingConfigParser as icp + def read_options(args): """ read the options into a dictionary @@ -34,6 +35,45 @@ def read_options(args): del config_opts['environment'] return config_opts + +def get_true_date(replay_day_dt, controls): + if 'replay-start-time' not in controls: + return replay_day_dt + + replay_start_time = int(controls['replay-start-time']) + true_start_time = int(controls['true-start-time']) + replay_duration = int(controls['replay-duration']) + rep_start_utc = lal.GPSToUTC(replay_start_time)[0:6] + + dt_replay_start = datetime( + year=rep_start_utc[0], + month=rep_start_utc[1], + day=rep_start_utc[2], + hour=rep_start_utc[3], + minute=rep_start_utc[4], + second=rep_start_utc[5] + ) + + td = (replay_day_dt - dt_replay_start).total_seconds() + + # Time since the start of this replay + time_since_replay = np.remainder(td, replay_duration) + + # Add this on to the original start time to get the current time of + # the replay data + true_utc = lal.GPSToUTC(true_start_time)[0:6] + dt_true_start = datetime( + year=true_utc[0], + month=true_utc[1], + day=true_utc[2], + hour=true_utc[3], + minute=true_utc[4], + second=true_utc[5] + ) + # Original time of the data being replayed right now + return dt_true_start + timedelta(seconds=time_since_replay) + + def trigger_collation( day_dt, day_str, @@ -41,7 +81,7 @@ def trigger_collation( collation_options, output_dir, controls - ): +): """ Perform the trigger collation as specified """ @@ -80,7 +120,7 @@ def fit_by_template( output_dir, ifo, controls - ): +): """ Supervise the running of pycbc_fit_sngls_by_template on live triggers """ @@ -99,12 +139,12 @@ def fit_by_template( return fbt_out_full, day_str -def find_daily_fit_files( +def find_daily_files( combined_control_options, daily_fname_format, daily_files_dir, ifo=None - ): +): """ Find files which match the specified formats """ @@ -113,41 +153,8 @@ def find_daily_fit_files( log_str += f"in detector {ifo}" logging.info(log_str) combined_days = int(combined_control_options['combined-days']) - if 'replay-start-time' in combined_control_options: - replay_start_time = int(combined_control_options['replay-start-time']) - true_start_time = int(combined_control_options['true-start-time']) - replay_duration = int(combined_control_options['replay-duration']) - rep_start_utc = lal.GPSToUTC(replay_start_time)[0:6] - - dt_replay_start = datetime( - year=rep_start_utc[0], - month=rep_start_utc[1], - day=rep_start_utc[2], - hour=rep_start_utc[3], - minute=rep_start_utc[4], - second=rep_start_utc[5] - ) - td = (day_dt - dt_replay_start).total_seconds() - - # Time since the start of this replay - time_since_replay = np.remainder(td, replay_duration) - - # Add this on to the original start time to get the current time of - # the replay data - true_utc = lal.GPSToUTC(true_start_time)[0:6] - dt_true_start = datetime( - year=true_utc[0], - month=true_utc[1], - day=true_utc[2], - hour=true_utc[3], - minute=true_utc[4], - second=true_utc[5] - ) - # Original time of the data being replayed right now - current_date = dt_true_start + timedelta(seconds=time_since_replay) - else: - current_date = day_dt + current_date = get_true_date(day_dt, combined_control_options) date_test = current_date + timedelta(days=1) @@ -207,15 +214,14 @@ def fit_over_multiparam( fit_over_controls, fit_over_options, ifo, - day_str, output_dir, controls - ): +): """ Supervise the smoothing of live trigger fits using pycbc_fit_sngls_over_multiparam """ - daily_files, first_date, end_date = find_daily_fit_files( + daily_files, first_date, end_date = find_daily_files( fit_over_controls, fit_over_controls['fit-by-format'], controls['output-directory'], @@ -226,11 +232,6 @@ def fit_over_multiparam( "specified parameters", len(daily_files) ) - logging.info( - "Smoothing fits using fit_over_multiparam with %d files and " - "specified parameters", - len(daily_files) - ) file_id_str = f'{first_date}-{end_date}' out_fname = fit_over_controls['fit-over-format'].format( dates=file_id_str, @@ -247,10 +248,11 @@ def fit_over_multiparam( variable_fits = fit_over_controls['variable-fit-over-param'].format( ifo=ifo ) - sv.symlink(fit_over_full, variable_fits) + shutil.copyfile(fit_over_full, variable_fits) return fit_over_full, file_id_str + def plot_fits( fits_file, ifo, @@ -258,7 +260,7 @@ def plot_fits( plot_fit_options, controls, smoothed=False - ): +): """Plotting for fit_by files, and linking to the public directory""" fits_plot_output = fits_file[:-3] + 'png' logging.info( @@ -279,7 +281,7 @@ def plot_fits( ifo, day_title_str ) - if smoothed == True: + if smoothed: title += ', smoothed' fits_plot_arguments += ['--title', title] sv.run_and_error(fits_plot_arguments, controls) @@ -292,21 +294,21 @@ def plot_fits( def single_significance_fits( - daily_controls, - daily_options, + daily_fit_controls, + daily_fit_options, output_dir, day_str, day_dt, controls, stat_files=None, - ): +): """ Supervise the significance fits for live triggers using pycbc_live_single_significance_fits """ - daily_options['output'] = os.path.join( + daily_fit_options['output'] = os.path.join( output_dir, - daily_controls['sig-daily-format'].format( + daily_fit_controls['sig-daily-format'].format( ifos=''.join(sorted(controls['ifos'].split())), date=day_str ), @@ -315,15 +317,16 @@ def single_significance_fits( gps_start_time = gpstime.utc_to_gps(day_dt).gpsSeconds gps_end_time = gpstime.utc_to_gps(day_dt + timedelta(days=1)).gpsSeconds - daily_options['gps-start-time'] = f'{gps_start_time:d}' - daily_options['gps-end-time'] = f'{gps_end_time:d}' - daily_args += sv.dict_to_args(daily_options) + daily_fit_options['gps-start-time'] = f'{gps_start_time:d}' + daily_fit_options['gps-end-time'] = f'{gps_end_time:d}' + daily_args += sv.dict_to_args(daily_fit_options) if stat_files is not None: daily_args += ['--statistic-files'] + stat_files sv.run_and_error(daily_args, controls) - return daily_options['output'] + return daily_fit_options['output'] + def plot_single_significance_fits(daily_output, daily_plot_options, controls): """ @@ -365,21 +368,21 @@ def plot_single_significance_fits(daily_output, daily_plot_options, controls): def combine_significance_fits( - combined_options, - combined_controls, + combined_fit_options, + combined_fit_controls, output_dir, day_str, controls - ): +): """ Supervise the smoothing of live trigger significance fits using pycbc_live_combine_single_significance_fits """ # This has a trick to do partial formatting, get the IFOs into the # string, but not the date - daily_files, first_date, end_date = find_daily_fit_files( - combined_controls, - combined_controls['daily-format'].format( + daily_files, first_date, end_date = find_daily_files( + combined_fit_controls, + combined_fit_controls['daily-format'].format( ifos=''.join(sorted(controls['ifos'].split())), date='{date}' ), @@ -390,26 +393,27 @@ def combine_significance_fits( len(daily_files) ) date_range = f'{first_date}-{end_date}' - outfile_name = combined_controls['outfile-format'].format( + outfile_name = combined_fit_controls['outfile-format'].format( date=day_str, date_range=date_range, ) - combined_options['output'] = os.path.join(output_dir, outfile_name) - combined_options['trfits-files'] = ' '.join(daily_files) + combined_fit_options['output'] = os.path.join(output_dir, outfile_name) + combined_fit_options['trfits-files'] = ' '.join(daily_files) - combined_args = ['pycbc_live_combine_single_significance_fits'] - combined_args += sv.dict_to_args(combined_options) + combined_fit_args = ['pycbc_live_combine_single_significance_fits'] + combined_fit_args += sv.dict_to_args(combined_fit_options) - sv.run_and_error(combined_args, controls) + sv.run_and_error(combined_fit_args, controls) - if 'variable-significance-fits' in combined_controls: + if 'variable-significance-fits' in combined_fit_controls: logging.info("Linking to variable significance fits file") - sv.symlink( - combined_options['output'], - combined_controls['variable-significance-fits'] + shutil.copyfile( + combined_fit_options['output'], + combined_fit_controls['variable-significance-fits'] ) - return combined_options['output'], date_range + return combined_fit_options['output'], date_range + def plot_combined_significance_fits( csf_file, @@ -418,13 +422,13 @@ def plot_combined_significance_fits( combined_plot_options, combined_plot_control_options, controls - ): +): """ Plotting combined significance fits, and link to public directory if wanted """ oput_fmt = combined_plot_control_options['output-plot-name-format'] - if not '{date_range}' in oput_fmt: + if '{date_range}' not in oput_fmt: raise RuntimeError( "Must specify {date_range} in output-plot-name-format" ) @@ -460,6 +464,138 @@ def plot_combined_significance_fits( for cpo in combined_plot_outputs: sv.symlink(cpo, public_dir) + +def daily_dq_trigger_rates( + trigger_merge_file, + day_str, + day_dt, + daily_dq_controls, + daily_dq_options, + output_dir, + ifo, + controls +): + """ + For a given day, record the number of idq flagged triggers + in each dq bin, total triggers in each dq bin, the amount + of time flagged by idq and the total observing time. + """ + logging.info(f"Calculating daily dq trigger rates for {ifo}") + fname_format = daily_dq_controls['daily-dq-format'] + ddtr_out_fname = fname_format.format(date=day_str, ifo=ifo) + + today = get_true_date(day_dt, daily_dq_controls) + + gps_start_time = gpstime.utc_to_gps(today).gpsSeconds + gps_end_time = gpstime.utc_to_gps(today + timedelta(days=1)).gpsSeconds + + ddtr_out_full = os.path.join(output_dir, ddtr_out_fname) + daily_dq_args = ['pycbc_live_collated_dq_trigger_rates'] + daily_dq_args += ['--trigger-file', trigger_merge_file] + daily_dq_args += ['--output', ddtr_out_full] + daily_dq_args += ['--ifo', ifo] + + daily_dq_options['gps-start-time'] = f'{gps_start_time:d}' + daily_dq_options['gps-end-time'] = f'{gps_end_time:d}' + daily_dq_args += sv.dict_to_args(daily_dq_options) + + sv.run_and_error(daily_dq_args, controls) + + return ddtr_out_full + + +def combine_dq_trigger_rates( + combined_dq_controls, + combined_dq_options, + ifo, + output_dir, + controls +): + """ + Calculate the dq trigger rate adjustment from multiple days of data + """ + daily_files, first_date, end_date = find_daily_files( + combined_dq_controls, + combined_dq_controls['daily-dq-format'], + controls['output-directory'], + ifo + ) + + date_range = f'{first_date}-{end_date}' + outfile_name = combined_dq_controls['combined-dq-format'].format( + ifo=ifo, + dates=date_range, + ) + + logging.info( + f"Combining {ifo} dq trigger rates over {len(daily_files)} files " + f"from {date_range}" + ) + + combined_dq_options['output'] = os.path.join(output_dir, outfile_name) + combined_dq_options['daily-dq-files'] = ' '.join(daily_files) + combined_dq_options['ifo'] = ifo + + combine_dq_args = ['pycbc_live_combine_dq_trigger_rates'] + combine_dq_args += sv.dict_to_args(combined_dq_options) + + sv.run_and_error(combine_dq_args, controls) + + if 'variable-dq-trigger-rates' in combined_dq_controls: + logging.info("Linking to variable significance fits file") + + shutil.copyfile( + combined_dq_options['output'], + combined_dq_controls['variable-dq-trigger-rates'].format( + ifo=ifo + ) + ) + + return combined_dq_options['output'], date_range + + +def plot_dq_trigger_rates( + dq_file, + ifo, + date_str, + plot_dq_options, + controls +): + """ + Plotting for dq trigger rate files, and linking to the public directory + """ + dq_plot_output = dq_file[:-3] + 'png' + logging.info( + "Plotting dq trigger rates %s to %s", + dq_file, + dq_plot_output + ) + dq_plot_arguments = [ + 'pycbc_plot_dq_flag_likelihood', + '--dq-file', + dq_file, + '--output-file', + dq_plot_output, + '--ifo', + ifo, + ] + dq_plot_arguments += sv.dict_to_args(plot_dq_options) + + title = "Trigger rate adjustment for iDQ flagged times from {}, {}".format( + ifo, + date_str + ) + dq_plot_arguments += ['--title', title] + + sv.run_and_error(dq_plot_arguments, controls) + if 'public-dir' in controls: + public_dir = os.path.abspath(os.path.join( + controls['public-dir'], + *day_str.split('_') + )) + sv.symlink(dq_plot_output, public_dir) + + def supervise_collation_fits_dq(args, day_dt, day_str): """ Perform the trigger collation and fits etc. as specified @@ -474,19 +610,23 @@ def supervise_collation_fits_dq(args, day_dt, day_str): fit_over_options = config_opts['fit_over_multiparam'] fit_over_control_options = config_opts['fit_over_multiparam_control'] plot_fit_options = config_opts['plot_fit'] - daily_options = config_opts['significance_daily_fits'] - daily_control_options = config_opts['significance_daily_fits_control'] - daily_plot_options = config_opts['plot_significance_daily'] - combined_options = config_opts['significance_combined_fits'] + daily_significance_options = config_opts['significance_daily_fits'] + daily_significance_control_options = config_opts['significance_daily_fits_control'] + daily_significance_plot_options = config_opts['plot_significance_daily'] + combined_significance_fit_options = config_opts['significance_combined_fits'] combined_control_options = config_opts['significance_combined_fits_control'] combined_plot_options = config_opts['plot_significance_combined'] combined_plot_control_options = config_opts['plot_significance_combined_control'] + daily_dq_options = config_opts['daily_dq_trigger_rates'] + daily_dq_control_options = config_opts['daily_dq_trigger_rates_control'] + combined_dq_options = config_opts['combined_dq_trigger_rates'] + combined_dq_control_options = config_opts['combined_dq_trigger_rates_control'] + plot_dq_options = config_opts['plot_dq_trigger_rates'] # The main output directory will have a date subdirectory which we # put the output into sv.ensure_directories(controls, day_str) - ifos = controls['ifos'].split() output_dir = os.path.join( controls['output-directory'], day_str @@ -499,18 +639,26 @@ def supervise_collation_fits_dq(args, day_dt, day_str): )) logging.info("Outputs to be linked to % ", public_dir) - merged_triggers = trigger_collation( - day_dt, - day_str, - collation_control_options, - collation_options, - output_dir, - controls - ) + if args.collated_trigger_file is not None: + logging.info( + "Using collated trigger file %s", + args.collated_trigger_file + ) + merged_triggers = args.collated_trigger_file + else: + merged_triggers = trigger_collation( + day_dt, + day_str, + collation_control_options, + collation_options, + output_dir, + controls + ) # Store the locations of files needed for the statistic stat_files = [] for ifo in config_opts['control']['ifos'].split(): if args.fit_by_template: + # compute and plot daily template fits fbt_file, date_str = fit_by_template( merged_triggers, day_str, @@ -528,12 +676,33 @@ def supervise_collation_fits_dq(args, day_dt, day_str): controls ) + if args.daily_dq_trigger_rates: + # compute and plot daily dq trigger rates + ddtr_file = daily_dq_trigger_rates( + merged_triggers, + day_str, + day_dt, + daily_dq_control_options, + daily_dq_options, + output_dir, + ifo, + controls + ) + plot_dq_trigger_rates( + ddtr_file, + ifo, + day_str, + plot_dq_options, + controls + ) + if args.fit_over_multiparam: + # compute and plot template fits smoothed over parameter space + # and combining multiple days of triggers fom_file, date_str = fit_over_multiparam( fit_over_control_options, fit_over_options, ifo, - day_str, output_dir, controls ) @@ -547,10 +716,28 @@ def supervise_collation_fits_dq(args, day_dt, day_str): smoothed=True, ) + if args.combine_dq_trigger_rates: + # combine the dq trigger rates from multiple days + cdtr_file, dq_date_str = combine_dq_trigger_rates( + combined_dq_control_options, + combined_dq_options, + ifo, + output_dir, + controls + ) + stat_files.append(cdtr_file) + plot_dq_trigger_rates( + cdtr_file, + ifo, + dq_date_str, + plot_dq_options, + controls + ) + if args.single_significance_fits: ssf_file = single_significance_fits( - daily_control_options, - daily_options, + daily_significance_control_options, + daily_significance_options, output_dir, day_str, day_dt, @@ -559,12 +746,12 @@ def supervise_collation_fits_dq(args, day_dt, day_str): ) plot_single_significance_fits( ssf_file, - daily_plot_options, + daily_significance_plot_options, controls ) if args.combine_significance_fits: csf_file, date_str = combine_significance_fits( - combined_options, + combined_significance_fit_options, combined_control_options, output_dir, date_str, @@ -587,6 +774,7 @@ def get_yesterday_date(): day_str = day_dt.strftime('%Y_%m_%d') return day_dt, day_str + parser = argparse.ArgumentParser(description=__doc__) pycbc.add_common_pycbc_options(parser) parser.add_argument( @@ -598,6 +786,11 @@ parser.add_argument( help='Date to analyse, if not given, will analyse yesterday (UTC). ' 'Format YYYY_MM_DD. Do not use if using --run-daily-at.' ) +parser.add_argument( + '--collated-trigger-file', + help='Collated trigger file to use, if not given, will be created. ' + 'Useful for testing.' +) parser.add_argument( '--fit-by-template', action='store_true', @@ -618,6 +811,16 @@ parser.add_argument( action='store_true', help="Do combination of singles significance fits." ) +parser.add_argument( + '--daily-dq-trigger-rates', + action='store_true', + help="Calculate daily dq trigger rates." +) +parser.add_argument( + '--combine-dq-trigger-rates', + action='store_true', + help="Combine dq trigger rates over multiple days." +) parser.add_argument( '--run-daily-at', metavar='HH:MM:SS', diff --git a/bin/plotting/pycbc_plot_dq_flag_likelihood b/bin/plotting/pycbc_plot_dq_flag_likelihood index 2e8a483d4fd..f8d459d02b6 100644 --- a/bin/plotting/pycbc_plot_dq_flag_likelihood +++ b/bin/plotting/pycbc_plot_dq_flag_likelihood @@ -15,9 +15,16 @@ from pycbc.io.hdf import HFile parser = argparse.ArgumentParser(description=__doc__) pycbc.add_common_pycbc_options(parser) parser.add_argument("--dq-file", required=True) -parser.add_argument("--dq-label", required=True) parser.add_argument("--ifo", type=str, required=True) parser.add_argument("--output-file", required=True) +parser.add_argument("--low-latency", action='store_true') +title_grp = parser.add_mutually_exclusive_group(required=True) +title_grp.add_argument("--dq-label", type=str, + help="Name of dq flag. Used in plot title," + " mutually exclusive with --title") +title_grp.add_argument("--title", type=str, + help="Title of plot, mutually exclusive with --dq-label") + args = parser.parse_args() pycbc.init_logging(args.verbose) @@ -27,18 +34,25 @@ ifo = args.ifo f = HFile(args.dq_file, 'r') ifo_grp = f[ifo] -dq_states = ifo_grp['dq_segments'].keys() bin_names = ifo_grp['bins'].keys() -num_bins = len(bin_names) -x = numpy.arange(num_bins) -width = 0.25 - -dq_states = { - 0: 'Clean', - 1: 'DQ Flag', - 2: 'Autogating', -} +x = numpy.arange(len(bin_names)) + +if args.low_latency: + dq_states = { + 0: 'Clean', + 1: 'DQ Flag', + } + x_shift = 0.5 + width = 0.33 +else: + dq_states = { + 0: 'Clean', + 1: 'DQ Flag', + 2: 'Autogating', + } + x_shift = 1 + width = 0.25 colors = { 0: 'green', @@ -59,7 +73,7 @@ for n, dqstate_name in dq_states.items(): ymax = max(ymax, numpy.max(dq_rates)) - offset = width * (n - 1) + offset = width * (n - x_shift) ax.bar(x + offset, dq_rates, width, label=dqstate_name, color=colors[n]) ymax = ymax**1.05 @@ -76,7 +90,10 @@ ax2.set_ylabel('DQ Log Likelihood Penalty') ax2.set_ylim(numpy.log(ymin), numpy.log(ymax)) # add meta data and save figure -plot_title = f'{ifo}:{args.dq_label} DQ Trigger Rates' +if args.title is not None: + plot_title = args.title +else: + plot_title = f'{ifo}:{args.dq_label} DQ Trigger Rates' ax.set_title(plot_title)