Skip to content

Commit

Permalink
893 Add protection for no input files
Browse files Browse the repository at this point in the history
#893

[author: gonzaponte]

Currently, there is a protection against not specifying
files_in. However, if we do set this variable, but it does not
correspond to a valid file or files through globbing, the city will
silently finish causing confusion. This PR fixes that.

[reviewer: jwaiton]

This change adds protections for the case in which malformed input files are provided to the city, which previously caused the city to stop silently with no information provided. Good job!
  • Loading branch information
jwaiton committed Oct 8, 2024
2 parents 8e6ac51 + 5fc13d1 commit d25eb48
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 11 deletions.
15 changes: 10 additions & 5 deletions invisible_cities/cities/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,18 @@ def proxy(**kwds):
if isinstance(conf.files_in, str):
conf.files_in = [conf.files_in]

globbed_files = map(glob, map(expandvars, conf.files_in))
globbed_files = list(f for fs in globbed_files for f in fs)
if len(set(globbed_files)) != len(globbed_files):
input_files = []
for pattern in map(expandvars, conf.files_in):
globbed_files = glob(pattern)
if len(globbed_files) == 0:
raise FileNotFoundError(f"Input pattern {pattern} did not match any files.")
input_files.extend(globbed_files)

if len(set(input_files)) != len(input_files):
warnings.warn("files_in contains repeated values. Ignoring duplicate files.", UserWarning)
globbed_files = [f for i, f in enumerate(globbed_files) if f not in globbed_files[:i]]
input_files = [f for i, f in enumerate(input_files) if f not in input_files[:i]]

conf.files_in = globbed_files
conf.files_in = input_files
conf.file_out = expandvars(conf.file_out)

conf.event_range = event_range(conf)
Expand Down
17 changes: 17 additions & 0 deletions invisible_cities/cities/components_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .. core.configure import configure
from .. core.exceptions import InvalidInputFileStructure
from .. core.exceptions import SensorIDMismatch
from .. core.exceptions import NoInputFiles
from .. core.testing_utils import assert_tables_equality
from .. core import system_of_units as units
from .. types.symbols import WfType
Expand Down Expand Up @@ -183,6 +184,22 @@ def dummy_city( files_in : Union[str, list]
assert result == files_in


def test_city_fails_if_bad_input_file(config_tmpdir, ICDATADIR):
file_ok = os.path.join(ICDATADIR, "electrons_40keV_z25_RWF.h5") # any file will do
file_bad = "/this/file/does/not/exist.h5"
files_in = [file_ok, file_bad]
file_out = os.path.join(config_tmpdir, "test_city_fails_if_bad_input_file.h5")

@city
def dummy_city( files_in : Union[str, list]
, file_out : str
, event_range : tuple):
pass

with raises(FileNotFoundError):
dummy_city(files_in=files_in, file_out=file_out, event_range=(0, 1))


def test_compute_xy_position_depends_on_actual_run_number():
"""
The channels entering the reco algorithm are the ones in a square of 3x3
Expand Down
2 changes: 1 addition & 1 deletion invisible_cities/config/beersheba.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
files_in = '$ICDIR/database/test_data/Tl208_NEW_v1_03_01_nexus_v5_03_04_cut144.tracks_10000.root.h5'
files_in = '$ICDIR/database/test_data/228Th_10evt_hits.h5'
file_out = '$ICDIR/database/test_data/test_beersheba.h5'
compression = 'ZLIB4'
event_range=10
Expand Down
2 changes: 1 addition & 1 deletion invisible_cities/config/isaura.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
files_in = '$ICDIR/database/test_data/test_beersheba.h5'
files_in = '$ICDIR/database/test_data/exact_Kr_deconvolution_with_MC.h5'
file_out = '/tmp/decoTopology.h5'
compression = 'ZLIB4'
event_range=1000
Expand Down
9 changes: 5 additions & 4 deletions invisible_cities/io/indexation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ def _df_writer(h5out):
( pmap_writer, "PMAPS", "S2" , "event", "s2" ),
( pmap_writer, "PMAPS", "S2Si" , "event", "s2si"),
( _df_writer, "DUMMY", "dummy" , "event", 'df' )])
def test_table_is_indexed(tmpdir_factory, writer, group, node, column, thing):
tmpdir = tmpdir_factory.mktemp('indexation')
file_out = os.path.join(tmpdir, f"empty_table_containing_{thing}.h5")
writer_test_city(writer=writer, file_out=file_out, files_in='dummy', detector_db = 'new')
def test_table_is_indexed(tmpdir_factory, ICDATADIR, writer, group, node, column, thing):
tmpdir = tmpdir_factory.mktemp('indexation')
file_out = os.path.join(tmpdir, f"empty_table_containing_{thing}.h5")
dummy_files = os.path.join(ICDATADIR, "*.h5")
writer_test_city(writer=writer, file_out=file_out, files_in=dummy_files, detector_db = 'new')
with tb.open_file(file_out, 'r') as h5out:
table = getattr(getattr(h5out.root, group), node)
assert getattr(table.cols, column).is_indexed

0 comments on commit d25eb48

Please sign in to comment.