Skip to content

Commit

Permalink
Merge pull request #25 from NREL/pp/collect_clobber
Browse files Browse the repository at this point in the history
Collect clobber by default
  • Loading branch information
ppinchuk committed Sep 18, 2023
2 parents 66b8aae + 6d33d77 commit 0c6a857
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 13 deletions.
14 changes: 12 additions & 2 deletions gaps/cli/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@


def collect(
_out_path, _pattern, project_points=None, datasets=None, purge_chunks=False
_out_path,
_pattern,
project_points=None,
datasets=None,
purge_chunks=False,
clobber=True,
):
"""Run collection on local worker.
Expand Down Expand Up @@ -44,6 +49,11 @@ def collect(
input files will **not** be removed if any of the datasets they
contain have not been collected, regardless of the value of this
input. By default, ``False``.
clobber : bool, optional
Flag to purge all collection output HDF5 files prior to running
the collection step if they exist on disk. This helps avoid any
surprising data byproducts when re-running the collection step
in a project directory. By default, ``True``.
Returns
-------
Expand All @@ -62,7 +72,7 @@ def collect(
)

datasets = _find_datasets(datasets, _pattern)
collector = Collector(_out_path, _pattern, project_points)
collector = Collector(_out_path, _pattern, project_points, clobber=clobber)
for dataset_name in datasets:
logger.debug("Collecting %r...", dataset_name)
collector.collect(dataset_name)
Expand Down
34 changes: 23 additions & 11 deletions tests/cli/test_cli_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,28 @@ def test_collect(
profiles = manual_collect(collect_dir / pattern, "cf_profile")

assert not list(tmp_path.glob("*"))
for h5_file in collect_dir.glob(pattern):
for file_ind, h5_file in enumerate(collect_dir.glob(pattern)):
shutil.copy(h5_file, tmp_path / h5_file.name)
if file_ind == 0:
shutil.copy(h5_file, out_file)

files = list(tmp_path.glob("*"))
assert len(files) == 4
assert len(files) == 5
assert tmp_path / "chunk_files" not in files
assert out_file not in files
assert out_file in files

pattern = (tmp_path / pattern).as_posix()
collect(
out_file,
pattern,
project_points=points_path,
datasets=datasets,
purge_chunks=True,
)
with pytest.warns(gapsWarning) as warning_info:
collect(
out_file,
pattern,
project_points=points_path,
datasets=datasets,
purge_chunks=True,
)

expected_message = "already exists and is being replaced"
assert expected_message in warning_info[0].message.args[0]

files = list(tmp_path.glob("*"))
assert tmp_path / "chunk_files" not in files
Expand Down Expand Up @@ -86,14 +92,20 @@ def test_collect_other_inputs(
assert out_file not in files

pattern = (tmp_path / pattern).as_posix()
with pytest.warns(gapsWarning):
with pytest.warns(gapsWarning) as warning_info:
collect(
out_file,
pattern,
project_points=points_path,
datasets=["cf_profile", "dne_dataset"],
)

expected_message = (
"Could not find the following datasets in the output files"
)
assert expected_message in warning_info[0].message.args[0]
assert "dne_dataset" in warning_info[0].message.args[0]

files = list(tmp_path.glob("*"))
assert tmp_path / "chunk_files" in files
assert out_file in files
Expand Down

0 comments on commit 0c6a857

Please sign in to comment.