From 44c24b419ba35e2eb6d93bb93b47cd7578759f56 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 18 Sep 2023 14:03:41 -0600 Subject: [PATCH 1/2] Add test for warning message --- tests/cli/test_cli_collect.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/cli/test_cli_collect.py b/tests/cli/test_cli_collect.py index 2601eda6..9dc877c6 100644 --- a/tests/cli/test_cli_collect.py +++ b/tests/cli/test_cli_collect.py @@ -86,7 +86,7 @@ def test_collect_other_inputs( assert out_file not in files pattern = (tmp_path / pattern).as_posix() - with pytest.warns(gapsWarning): + with pytest.warns(gapsWarning) as warning_info: collect( out_file, pattern, @@ -94,6 +94,12 @@ def test_collect_other_inputs( datasets=["cf_profile", "dne_dataset"], ) + expected_message = ( + "Could not find the following datasets in the output files" + ) + assert expected_message in warning_info[0].message.args[0] + assert "dne_dataset" in warning_info[0].message.args[0] + files = list(tmp_path.glob("*")) assert tmp_path / "chunk_files" in files assert out_file in files From 6d33d77b7872fc49e5b58094d02e530f78ebc836 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Mon, 18 Sep 2023 14:04:01 -0600 Subject: [PATCH 2/2] Add `clobber` to collect CLI --- gaps/cli/collect.py | 14 ++++++++++++-- tests/cli/test_cli_collect.py | 26 ++++++++++++++++---------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/gaps/cli/collect.py b/gaps/cli/collect.py index 19a2b3e5..7165fdce 100644 --- a/gaps/cli/collect.py +++ b/gaps/cli/collect.py @@ -14,7 +14,12 @@ def collect( - _out_path, _pattern, project_points=None, datasets=None, purge_chunks=False + _out_path, + _pattern, + project_points=None, + datasets=None, + purge_chunks=False, + clobber=True, ): """Run collection on local worker. @@ -44,6 +49,11 @@ def collect( input files will **not** be removed if any of the datasets they contain have not been collected, regardless of the value of this input. By default, ``False``. + clobber : bool, optional + Flag to purge all collection output HDF5 files prior to running + the collection step if they exist on disk. This helps avoid any + surprising data byproducts when re-running the collection step + in a project directory. By default, ``True``. Returns ------- @@ -62,7 +72,7 @@ def collect( ) datasets = _find_datasets(datasets, _pattern) - collector = Collector(_out_path, _pattern, project_points) + collector = Collector(_out_path, _pattern, project_points, clobber=clobber) for dataset_name in datasets: logger.debug("Collecting %r...", dataset_name) collector.collect(dataset_name) diff --git a/tests/cli/test_cli_collect.py b/tests/cli/test_cli_collect.py index 9dc877c6..efeb9a7c 100644 --- a/tests/cli/test_cli_collect.py +++ b/tests/cli/test_cli_collect.py @@ -36,22 +36,28 @@ def test_collect( profiles = manual_collect(collect_dir / pattern, "cf_profile") assert not list(tmp_path.glob("*")) - for h5_file in collect_dir.glob(pattern): + for file_ind, h5_file in enumerate(collect_dir.glob(pattern)): shutil.copy(h5_file, tmp_path / h5_file.name) + if file_ind == 0: + shutil.copy(h5_file, out_file) files = list(tmp_path.glob("*")) - assert len(files) == 4 + assert len(files) == 5 assert tmp_path / "chunk_files" not in files - assert out_file not in files + assert out_file in files pattern = (tmp_path / pattern).as_posix() - collect( - out_file, - pattern, - project_points=points_path, - datasets=datasets, - purge_chunks=True, - ) + with pytest.warns(gapsWarning) as warning_info: + collect( + out_file, + pattern, + project_points=points_path, + datasets=datasets, + purge_chunks=True, + ) + + expected_message = "already exists and is being replaced" + assert expected_message in warning_info[0].message.args[0] files = list(tmp_path.glob("*")) assert tmp_path / "chunk_files" not in files