Skip to content

Commit

Permalink
Use HFile instead of h5py.File in most places (gwastro#4792)
Browse files Browse the repository at this point in the history
* Move to HFile in pycbc_add_statmap

* Move to HFile in pycbc_apply_rerank

* update all-sky psd code

* Move pycbc_coinc_(findtrigs,hdfinjfine,mergetrigs) to HFile

* More places with HFile

* More moves

* Use HFile in more places

* Use HFile in more places

* Use HFile in more places

* Use HFile in more places

* Use HFile in more places

* Use HFile rather than h5py.File in bank

* move live executables to use HFile

* Use HFile in live examples

* Move minifollowups to use HFile

* Move populations to use HFile

* Move banksim codes to use HFile

* Start to move plotting codes to use HFile

* Finish moving plotting codes to use HFile

* Move scripts in /bin/ to use HFile

* Move bin/workflow_comparisons/offline_search codes to use HFile

* Use HFile in pycbc/events

* Moving more parts of pycbc/ to use HFile

* Use HFile for other things in pycbc/io/hdf

* Use HFile in pygrb codes

* Revert "Use HFile in pycbc/events"

This reverts commit 99dce9d.

* Missed removal of imports

* Remove HFile from types/array

* Remove HFile from pycbc/types
  • Loading branch information
GarethCabournDavies authored and yi-fan-wang committed Jul 15, 2024
1 parent 4e6ad42 commit 0920d11
Show file tree
Hide file tree
Showing 128 changed files with 394 additions and 373 deletions.
9 changes: 5 additions & 4 deletions bin/all_sky_search/pycbc_add_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
with more than one ifo combination available. Cluster to keep foreground
coincs with the highest stat value.
"""
import h5py, numpy as np, argparse, logging, pycbc, pycbc.events, pycbc.io
import numpy as np, argparse, logging, pycbc, pycbc.events, pycbc.io
import pycbc.version
import pycbc.conversions as conv
from pycbc.events import coinc, significance
Expand Down Expand Up @@ -91,9 +91,9 @@ else :

pycbc.init_logging(args.verbose)

files = [h5py.File(n, 'r') for n in args.statmap_files]
files = [pycbc.io.HFile(n, 'r') for n in args.statmap_files]

f = h5py.File(args.output_file, "w")
f = pycbc.io.HFile(args.output_file, "w")

# Work out the combinations of detectors used by each input file
all_ifo_combos = []
Expand Down Expand Up @@ -308,7 +308,7 @@ if injection_style:
# if background files are provided, this is being used for injections
# use provided background files to calculate the FARs
for bg_fname in args.background_files:
bg_f = h5py.File(bg_fname, 'r')
bg_f = pycbc.io.HFile(bg_fname, 'r')
ifo_combo_key = bg_f.attrs['ifos'].replace(' ','')
_, far[ifo_combo_key] = significance.get_far(
bg_f['background/stat'][:],
Expand All @@ -324,6 +324,7 @@ if injection_style:
bg_f['background_exc/decimation_factor'][:],
bg_f.attrs['background_time_exc'],
**significance_dict[ifo_combo_key])
bg_f.close()
else:
# if not injection style input files, then the input files will have the
# background included
Expand Down
11 changes: 6 additions & 5 deletions bin/all_sky_search/pycbc_apply_rerank
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
"""Rewrite statmap file and rerank candidates using the statistic values
generated from the followup of candidates.
"""
import h5py, numpy, argparse, logging, pycbc
import numpy, argparse, logging, pycbc
from pycbc.io import HFile
from pycbc.conversions import sec_to_year
from pycbc.events import significance
from shutil import copyfile
Expand All @@ -29,7 +30,7 @@ pycbc.init_logging(args.verbose)
significance.check_significance_options(args, parser)

# Reconstruct the full set of statistic values for our candidates
f = h5py.File(args.followup_file, 'r')
f = HFile(args.followup_file, 'r')
num = len(f['offsets']) # Number of followups done

# Mapping between the followups done and the original candidate list
Expand All @@ -42,7 +43,7 @@ sections = f.attrs['sections']
values = []
starts = []
for fname in args.stat_files:
f = h5py.File(fname, 'r')
f = HFile(fname, 'r')
s = f.attrs['start_index']
v = f['stat'][:]
stride = f.attrs['stride']
Expand All @@ -52,7 +53,7 @@ stats = stats[inv]
# copy statmap file to output since we'll
# only make a few modifications
copyfile(args.statmap_file, args.output_file)
o = h5py.File(args.output_file)
o = HFile(args.output_file)

ifo_combo = o.attrs['ifos'].replace(' ','')

Expand All @@ -74,7 +75,7 @@ background_time_exc = o.attrs['background_time_exc']

# Injection run
if args.ranking_file:
f = h5py.File(args.ranking_file, 'r')
f = HFile(args.ranking_file, 'r')
fstat = o['foreground/stat'][:]
backstat = f['background_exc/stat'][:]
dec = f['background_exc/decimation_factor'][:]
Expand Down
4 changes: 2 additions & 2 deletions bin/all_sky_search/pycbc_average_psd
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ the harmonic mean."""
import logging
import argparse
import numpy as np
import h5py
import pycbc
from pycbc.io import HFile
from pycbc.version import git_verbose_msg as version
from pycbc.types import MultiDetOptionAction, FrequencySeries

Expand Down Expand Up @@ -54,7 +54,7 @@ delta_f = None

for input_file in args.input_files:
logging.info('Reading %s', input_file)
f = h5py.File(input_file, 'r')
f = HFile(input_file, 'r')
ifo = tuple(f.keys())[0]
df = f[ifo + '/psds/0'].attrs['delta_f']
if delta_f is None:
Expand Down
5 changes: 3 additions & 2 deletions bin/all_sky_search/pycbc_calculate_psd
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/usr/bin/env python
""" Calculate psd estimates for analysis segments
"""
import logging, argparse, numpy, h5py, multiprocessing, time, copy
import logging, argparse, numpy, multiprocessing, time, copy
from six.moves import zip_longest
import pycbc, pycbc.psd, pycbc.strain, pycbc.events
from pycbc.io import HFile
from pycbc.version import git_verbose_msg as version
from pycbc.fft.fftw import set_measure_level
from pycbc.workflow import resolve_td_option
Expand Down Expand Up @@ -109,7 +110,7 @@ else:
psds = []

# Store the PSDs in an hdf file, include some basic metadata
f = h5py.File(args.output_file, 'w')
f = HFile(args.output_file, 'w')
psd_group = f.create_group(ifo + '/psds')
inc, start, end = 0, [], []
for gpsd in psds:
Expand Down
7 changes: 4 additions & 3 deletions bin/all_sky_search/pycbc_coinc_findtrigs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/usr/bin/env python
import h5py, copy, argparse, logging, numpy, numpy.random
import copy, argparse, logging, numpy, numpy.random
import shutil, uuid, os.path, atexit
from ligo.segments import infinity
from pycbc.events import veto, coinc, stat, ranking, cuts
import pycbc.version
from pycbc.io import HFile
from pycbc import pool, init_logging
from numpy.random import seed, shuffle
from pycbc.io.hdf import ReadByTemplate
Expand Down Expand Up @@ -91,7 +92,7 @@ logging.info('Starting...')

trigger_cut_dict, template_cut_dict = cuts.ingest_cuts_option_group(args)

num_templates = len(h5py.File(args.template_bank, "r")['template_hash'])
num_templates = len(HFile(args.template_bank, "r")['template_hash'])
tmin, tmax = parse_template_range(num_templates, args.template_fraction_range)
logging.info('Analyzing template %s - %s' % (tmin, tmax-1))

Expand Down Expand Up @@ -510,7 +511,7 @@ if args.cluster_window and len(data['stat']) > 0:
args.cluster_window)

logging.info('saving coincident triggers')
f = h5py.File(args.output_file, 'w')
f = HFile(args.output_file, 'w')
if len(data['stat']) > 0:
for key in data:
var = data[key][cid] if args.cluster_window else data[key]
Expand Down
9 changes: 5 additions & 4 deletions bin/all_sky_search/pycbc_coinc_hdfinjfind
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
files.
"""

import argparse, h5py, logging, types, numpy, os.path
import argparse, logging, types, numpy, os.path
from ligo.lw import lsctables, utils as ligolw_utils
from ligo import segments
from pycbc import events, init_logging
from pycbc.events import indices_within_segments
from pycbc.types import MultiDetOptionAction
from pycbc.inject import CBCHDFInjectionSet
from pycbc.io import HFile
import pycbc.version
from pycbc.io.ligolw import LIGOLWContentHandler

Expand All @@ -22,7 +23,7 @@ def hdf_append(f, key, value):
else:
f[key] = value

h5py.File.append = types.MethodType(hdf_append, h5py.File)
HFile.append = types.MethodType(hdf_append, HFile)

def keep_ind(times, start, end):
""" Return the list of indices within the list of start and end times
Expand Down Expand Up @@ -79,13 +80,13 @@ args = parser.parse_args()

init_logging(args.verbose)

fo = h5py.File(args.output_file, 'w')
fo = HFile(args.output_file, 'w')

injection_index = 0
for trigger_file, injection_file in zip(args.trigger_files,
args.injection_files):
logging.info('Read in the coinc data: %s' % trigger_file)
f = h5py.File(trigger_file, 'r')
f = HFile(trigger_file, 'r')

# Get list of groups which contain subgroup 'time'
# - these will be the IFOs
Expand Down
11 changes: 6 additions & 5 deletions bin/all_sky_search/pycbc_coinc_mergetrigs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy, argparse, h5py, logging
import pycbc.version
from pycbc.io import HFile
from pycbc import init_logging

def changes(arr):
Expand All @@ -15,7 +16,7 @@ def changes(arr):
def collect(key, files):
data = []
for fname in files:
with h5py.File(fname, 'r') as fin:
with HFile(fname, 'r') as fin:
if key in fin:
data += [fin[key][:]]
return numpy.concatenate(data)
Expand Down Expand Up @@ -43,13 +44,13 @@ args = parser.parse_args()

init_logging(args.verbose)

f = h5py.File(args.output_file, 'w')
f = HFile(args.output_file, 'w')

logging.info("getting the list of columns from a representative file")
trigger_columns = []
for fname in args.trigger_files:
try:
f2 = h5py.File(fname, 'r')
f2 = HFile(fname, 'r')
except IOError as e:
logging.error("Cannot open %s" % fname)
raise e
Expand Down Expand Up @@ -83,7 +84,7 @@ ends = numpy.array([], dtype=numpy.float64)
gating = {}
for filename in args.trigger_files:
try:
data = h5py.File(filename, 'r')
data = HFile(filename, 'r')
except IOError as e:
logging.error('Cannot open %s', filename)
raise e
Expand Down Expand Up @@ -134,7 +135,7 @@ for gk, gv in gating.items():

logging.info('set up sorting of triggers and template ids')
# For fast lookup we need the templates in hash order
hashes = h5py.File(args.bank_file, 'r')['template_hash'][:]
hashes = HFile(args.bank_file, 'r')['template_hash'][:]
bank_tids = hashes.argsort()
unsort = bank_tids.argsort()
hashes = hashes[bank_tids]
Expand Down
4 changes: 2 additions & 2 deletions bin/all_sky_search/pycbc_coinc_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ the capability of doing hierarchical removal of foreground triggers that are
louder than all of the background triggers. We use this to properly assess
the FANs of any other gravitational waves in the dataset.
"""
import argparse, h5py, itertools
import argparse, itertools
import lal, logging, numpy
from pycbc.events import veto, coinc, significance
import pycbc.version, pycbc.pnutils, pycbc.io
Expand All @@ -16,7 +16,7 @@ import pycbc.conversions as conv

class fw(object):
def __init__(self, name):
self.f = h5py.File(name, 'w')
self.f = pycbc.io.HFile(name, 'w')
self.attrs = self.f.attrs

def __setitem__(self, name, data):
Expand Down
6 changes: 3 additions & 3 deletions bin/all_sky_search/pycbc_coinc_statmap_inj
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The program combines coincident output files generated
by pycbc_coinc_findtrigs to generated a mapping between SNR and FAP, along
with producing the combined foreground and background triggers
"""
import argparse, h5py, logging, itertools, copy, pycbc.io, numpy, lal
import argparse, logging, itertools, copy, pycbc.io, numpy, lal
from pycbc.events import veto, coinc, significance
import pycbc.version
import pycbc.conversions as conv
Expand Down Expand Up @@ -53,7 +53,7 @@ significance_dict = significance.digest_significance_options([ifo_key], args)

zdata = zdata.cluster(window)

f = h5py.File(args.output_file, "w")
f = pycbc.io.HFile(args.output_file, "w")

f.attrs['num_of_ifos'] = zdata.attrs['num_of_ifos']
f.attrs['pivot'] = zdata.attrs['pivot']
Expand All @@ -75,7 +75,7 @@ else:
f['foreground/%s' % key] = numpy.array([], dtype=zdata.data[key].dtype)

logging.info('calculating statistics excluding zerolag')
fb = h5py.File(args.full_data_background, "r")
fb = pycbc.io.HFile(args.full_data_background, "r")

# we expect the injfull file to contain injection data as pivot
# and fullinj to contain full data as pivot
Expand Down
6 changes: 3 additions & 3 deletions bin/all_sky_search/pycbc_combine_coincident_events
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ The resulting file would contain triggers from the full set of input files
import numpy
import argparse
import logging
import h5py

import pycbc
from pycbc.io import HFile
import pycbc.version

def com(f, files, group):
Expand Down Expand Up @@ -64,10 +64,10 @@ args = parser.parse_args()

pycbc.init_logging(args.verbose)

files = [h5py.File(n) for n in args.statmap_files]
files = [HFile(n) for n in args.statmap_files]

# Start setting some of the attributes
f = h5py.File(args.output_file, "w")
f = HFile(args.output_file, "w")
# It's not guaranteed that all files will follow this, so be careful later!
f.attrs['detector_1'] = files[0].attrs['detector_1']
f.attrs['detector_2'] = files[0].attrs['detector_2']
Expand Down
6 changes: 3 additions & 3 deletions bin/all_sky_search/pycbc_combine_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ combinations at the time of coincidence. This clusters to find the most
significant foreground, but leaves the background triggers alone.
"""

import h5py, numpy, argparse, logging, pycbc, pycbc.events, pycbc.io, lal
import numpy, argparse, logging, pycbc, pycbc.events, pycbc.io, lal
import pycbc.version
from ligo import segments

Expand All @@ -24,9 +24,9 @@ args = parser.parse_args()

pycbc.init_logging(args.verbose)

files = [h5py.File(n, 'r') for n in args.statmap_files]
files = [pycbc.io.HFile(n, 'r') for n in args.statmap_files]

f = h5py.File(args.output_file, "w")
f = pycbc.io.HFile(args.output_file, "w")

logging.info('Copying segments and attributes to %s' % args.output_file)
# Move segments information into the final file - remove some duplication
Expand Down
7 changes: 4 additions & 3 deletions bin/all_sky_search/pycbc_cut_merge_triggers_to_tmpltbank
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import numpy
import h5py
import pycbc
import pycbc.version
from pycbc.io import HFile

parser = argparse.ArgumentParser(description=__doc__)
pycbc.add_common_pycbc_options(parser)
Expand All @@ -50,7 +51,7 @@ parser.add_argument("--filter-func-file", required=True,
opt = parser.parse_args()
pycbc.init_logging(opt.verbose)

bank_fd = h5py.File(opt.full_template_bank, 'r')
bank_fd = HFile(opt.full_template_bank, 'r')

modl = imp.load_source('filter_func', opt.filter_func_file)
func = modl.filter_tmpltbank
Expand All @@ -69,11 +70,11 @@ copy_params = ['bank_chisq', 'bank_chisq_dof', 'chisq', 'chisq_dof',
'coa_phase', 'cont_chisq', 'cont_chisq_dof','end_time',
'sg_chisq', 'sigmasq', 'snr', 'template_duration']

ifd = h5py.File(opt.input_file, 'r')
ifd = HFile(opt.input_file, 'r')
ifos = list(ifd.keys())
assert(len(ifos) == 1)
ifo = ifos[0]
ofd = h5py.File(opt.output_file, 'w')
ofd = HFile(opt.output_file, 'w')
ofd.create_group(ifo)
new_boundaries = []
old_boundaries = []
Expand Down
6 changes: 3 additions & 3 deletions bin/all_sky_search/pycbc_distribute_background_bins
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/env python
import h5py, argparse, numpy, pycbc.events, logging, pycbc.events, pycbc.io
import argparse, numpy, pycbc.events, logging, pycbc.events, pycbc.io
import pycbc.version

parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -31,7 +31,7 @@ pycbc.init_logging(args.verbose)
if len(args.output_files) != len(args.background_bins):
raise ValueError("Number of mass bins and output files does not match")

f = h5py.File(args.bank_file)
f = pycbc.io.HFile(args.bank_file)
data = {'mass1':f['mass1'][:], 'mass2':f['mass2'][:],
'spin1z':f['spin1z'][:], 'spin2z':f['spin2z'][:]}
if args.f_lower:
Expand All @@ -48,5 +48,5 @@ for name, outname in zip(names, args.output_files):
e = d.select(numpy.in1d(d.template_id, locs))
logging.info('%s coincs in mass bin: %s' % (len(e), name))
e.save(outname)
f = h5py.File(outname)
f = pycbc.io.HFile(outname)
f.attrs['name'] = name
Loading

0 comments on commit 0920d11

Please sign in to comment.