From 77b77d3c63bd4f0c74560c1425e610ccb13ce8ac Mon Sep 17 00:00:00 2001 From: Johannes Bulin Date: Thu, 28 Nov 2024 14:41:21 +0000 Subject: [PATCH] Implemented DataHandler class and extract/remove/namelist subclasses (#6). --- ifsbench/data/__init__.py | 11 ++ ifsbench/data/datahandler.py | 49 ++++++ ifsbench/data/extracthandler.py | 53 ++++++ ifsbench/data/namelisthandler.py | 167 ++++++++++++++++++ ifsbench/data/renamehandler.py | 114 ++++++++++++ tests/data/test_extracthandler.py | 148 ++++++++++++++++ tests/data/test_namelisthandler.py | 273 +++++++++++++++++++++++++++++ tests/data/test_renamehandler.py | 97 ++++++++++ 8 files changed, 912 insertions(+) create mode 100644 ifsbench/data/__init__.py create mode 100644 ifsbench/data/datahandler.py create mode 100644 ifsbench/data/extracthandler.py create mode 100644 ifsbench/data/namelisthandler.py create mode 100644 ifsbench/data/renamehandler.py create mode 100644 tests/data/test_extracthandler.py create mode 100644 tests/data/test_namelisthandler.py create mode 100644 tests/data/test_renamehandler.py diff --git a/ifsbench/data/__init__.py b/ifsbench/data/__init__.py new file mode 100644 index 0000000..ef939b6 --- /dev/null +++ b/ifsbench/data/__init__.py @@ -0,0 +1,11 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from .datahandler import * # noqa +from .extracthandler import * # noqa +from .namelisthandler import * # noqa +from .renamehandler import * # noqa diff --git a/ifsbench/data/datahandler.py b/ifsbench/data/datahandler.py new file mode 100644 index 0000000..a45fc25 --- /dev/null +++ b/ifsbench/data/datahandler.py @@ -0,0 +1,49 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from abc import ABC, abstractmethod + +__all__ = ['DataHandler'] + +class DataHandler(ABC): + """ + Base class for data pipeline steps. Each DataHandler object describes one + step in the data pipeline. Multiple DataHandler objects can be executed + sequentially to perform specific data setup tasks. + """ + + + @abstractmethod + def execute(self, wdir, **kwargs): + """ + Run this data handling operation in a given directory. + + Parameters + ---------- + wdir : str or :any:`pathlib.Path` + The directory where the data handling should take place. + Subclasses of DataHandler should operate relative to this path, + unless absolute paths are given. + """ + return NotImplemented + + # @abstractmethod + # def to_dict(self): + # """ + # Convert this object to a dictionary. It's class name and module name + # should be given as the "class" and "module" entry, respectively. + # """ + # return NotImplemented + + # @classmethod + # @abstractmethod + # def from_dict(cls, data): + # """ + # Convert a dictionary to an object of this type. This is the inverse + # function to "to_dict". + # """ + # return NotImplemented diff --git a/ifsbench/data/extracthandler.py b/ifsbench/data/extracthandler.py new file mode 100644 index 0000000..222ef3b --- /dev/null +++ b/ifsbench/data/extracthandler.py @@ -0,0 +1,53 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pathlib +import shutil + +from .datahandler import DataHandler +from ..logging import debug + +__all__ = ['ExtractHandler'] + + +class ExtractHandler(DataHandler): + """ + DataHandler that extracts a given archive to a specific directory. + """ + + def __init__(self, archive_path, target_dir=None): + """ + Initialise the handler. + + Parameters + ---------- + archive_path: str or `pathlib.Path` + The path to the archive that will be extracted. If a relative path + is given, this will be relative to the `wdir` argument in `execute`. + + target_dir: str, `pathlib.Path` or `None` + The directory to where the archive will be unpacked. If a relative path + is given, this will be relative to the `wdir` argument in `execute`. + If None is given, this will re extracted to `wdir`. + """ + + self._archive_path = pathlib.Path(archive_path) + if target_dir is None: + self._target_dir = None + else: + self._target_dir = pathlib.Path(target_dir) + + def execute(self, wdir, **kwargs): + target_dir = wdir + if self._target_dir is not None: + if self._target_dir.is_absolute(): + target_dir = self._target_dir + else: + target_dir = wdir/self._target_dir + + debug(f"Unpack archive {self._archive_path} to {target_dir}.") + shutil.unpack_archive(self._archive_path, target_dir) diff --git a/ifsbench/data/namelisthandler.py b/ifsbench/data/namelisthandler.py new file mode 100644 index 0000000..e874740 --- /dev/null +++ b/ifsbench/data/namelisthandler.py @@ -0,0 +1,167 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from enum import auto, Enum +import pathlib + +import f90nml + +from .datahandler import DataHandler +from ..logging import debug, info + + +__all__ = ['NamelistOverride', 'NamelistHandler'] + + +class NamelistOverride: + """ + Specify changes that will be applied to a namelist. + """ + class NamelistOperation(Enum): + SET = auto() + APPEND = auto() + DELETE = auto() + + def __init__(self, key, mode, value=None): + """ + Parameters + ---------- + key: str or iterable of str + The namelist entry that will be modified. Can be either a string + where '/' separates the namelist name and the entry key or an iterable + of strings of length two. + + mode: NamelistOverride.NamelistOperation + What kind of operation is specified. Can be + * Set a certain entry. + * Append to an array entry. + * Delete an entry. + + value: + The value that is set (SET operation) or appended (APPEND). + """ + + if isinstance(key, str): + self._keys = key.split('/') + else: + self._keys = tuple(key) + + if len(self._keys) != 2: + raise ValueError("The key object must be of length two.") + + self._mode = mode + self._value = value + + if self._value is None: + if self._mode in (self.NamelistOperation.SET, self.NamelistOperation.APPEND): + raise ValueError("The new value must not be None!") + + def apply(self, namelist): + """ + Apply the stored changes to a namelist. + + Parameters + ---------- + namelist: f90nml.Namelist + The namelist to which the changes are applied. + """ + + if self._keys[0] not in namelist: + if self._mode == self.NamelistOperation.DELETE: + return + + namelist[self._keys[0]] = {} + + namelist = namelist[self._keys[0]] + key = self._keys[-1] + + if self._mode == self.NamelistOperation.SET: + debug(f"Set namelist entry {str(self._keys)} = {str(self._value)}.") + namelist[key] = self._value + elif self._mode == self.NamelistOperation.APPEND: + if key not in namelist: + namelist[key] = [] + + if not hasattr(namelist[key], 'append'): + raise ValueError("Values can only be appended to arrays!") + + # f90nml doesn't seem to do any kind of checking, so we could + # create arrays in the namelist where the entries have different + # types. + # This will most likely cause issues, so we verify here, that + # the array entries have the same type. + if len(namelist[key]) > 0: + type_list = type(namelist[key][0]) + type_value = type(self._value) + + if type_list != type_value: + raise ValueError("The given value must have the same type as existing array entries!") + + debug(f"Append {str(self._value)} to namelist entry {str(self._keys)}.") + + namelist[key].append(self._value) + + elif self._mode == self.NamelistOperation.DELETE: + if key in namelist: + debug(f"Delete namelist entry {str(self._keys)}.") + del namelist[key] + +class NamelistHandler(DataHandler): + """ + DataHandler specialisation that can modify Fortran namelists. + """ + + def __init__(self, input_path, output_path, overrides): + """ + Initialise the handler. + + Parameters + ---------- + input_path: str or `pathlib.Path` + The path to the namelist that will be modified. If a relative path + is given, this will be relative to the `wdir` argument in `execute`. + + output_path: str, `pathlib.Path` or `None` + The path to which the updated namelist will be written. If a relative path + is given, this will be relative to the `wdir` argument in `execute`. + If None is given, this will re extracted to `wdir`. + + overrides: iterable of NamelistOverride + The NamelistOverrides that will be applied. + """ + + self._input_path = pathlib.Path(input_path) + self._output_path = pathlib.Path(output_path) + + self._overrides = list(overrides) + for override in self._overrides: + if not isinstance(override, NamelistOverride): + raise ValueError("Namelist overrides must be NamelistOverride objects!") + + def execute(self, wdir, **kwargs): + if self._input_path.is_absolute(): + input_path = self._input_path + else: + input_path = wdir/self._input_path + + # Do nothing if the input namelist doesn't exist. + if not input_path.exists(): + info(f"Namelist {input_path} doesn't exist.") + return + + if self._output_path.is_absolute(): + output_path = self._output_path + else: + output_path = wdir/self._output_path + + debug(f"Modify namelist {input_path}.") + namelist = f90nml.read(input_path) + + for override in self._overrides: + override.apply(namelist) + + namelist.write(output_path, force=True) diff --git a/ifsbench/data/renamehandler.py b/ifsbench/data/renamehandler.py new file mode 100644 index 0000000..c0d04ce --- /dev/null +++ b/ifsbench/data/renamehandler.py @@ -0,0 +1,114 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from enum import auto, Enum +from pathlib import Path +import re +import shutil + +from .datahandler import DataHandler +from ..logging import debug + +__all__ = ['RenameHandler'] + +class RenameHandler(DataHandler): + """ + DataHandler specialisation that can move/rename files by using regular + expressions (as in re.sub). + """ + + class RenameMode(Enum): + """ + Enumeration of available rename operations. + + Attributes + ---------- + COPY : + Copy the file from its current place to the new location. + SYMLINK : + Create a symlink in the new location, pointing to its current + location. + MOVE : + Move the file from its current place to the new location. + """ + COPY = auto() + SYMLINK = auto() + MOVE = auto() + + def __init__(self, pattern, repl, mode=RenameMode.SYMLINK): + """ + Initialise the handler. + + Parameters + ---------- + pattern: str + The pattern that will be replaced. Corresponds to `pattern` in + `re.sub`. + + repl: str + The replacement pattern. Corresponds to `repl` in `re.sub`. + + mode: `RenameHandler.RenameMode` + Specifies how the renaming is done (copy, move, symlink). + + mode: `RenameHandler.RenameMode` + Specifies how the renaming is done (copy, move, symlink). + """ + self._pattern = str(pattern) + self._repl = str(repl) + self._mode = mode + + + def execute(self, wdir, **kwargs): + # We create a dictionary first, that stores the paths that will be + # modified. + path_mapping = {} + + for f in list(wdir.rglob('*')): + if f.is_dir(): + continue + + dest = Path(re.sub(self._pattern, self._repl, str(f.relative_to(wdir)))) + dest = (wdir/dest).resolve() + + if f != dest: + path_mapping[f] = dest + + # Check that we don't end up with two initial files being renamed to + # the same file. Crash if this is the case. + if len(set(path_mapping.keys())) != len(set(path_mapping.values())): + raise RuntimeError("Renaming would cause two different files to be given the same name!") + + for source, dest in path_mapping.items(): + # Crash if we are renaming one of the files to a path that is also + # the "source" for another renaming. + if dest in path_mapping: + raise RuntimeError(f"Can't move {source} to {dest} as there is a cyclical dependency!") + + # Delete whatever resides at dest at the moment (whether it's a + # file or a directory). + if dest.exists(): + debug(f"Delete existing file/directory {dest} before renaming.") + try: + shutil.rmtree(dest) + except NotADirectoryError: + dest.unlink() + + dest.parent.mkdir(parents=True, exist_ok=True) + + if self._mode == self.RenameMode.COPY: + debug(f"Copy {source} to {dest}.") + + shutil.copy(source, dest) + elif self._mode == self.RenameMode.SYMLINK: + debug(f"Symlink {source} to {dest}.") + + dest.symlink_to(source) + elif self._mode == self.RenameMode.MOVE: + debug(f"Move {source} to {dest}.") + + source.rename(dest) diff --git a/tests/data/test_extracthandler.py b/tests/data/test_extracthandler.py new file mode 100644 index 0000000..d3ad6bf --- /dev/null +++ b/tests/data/test_extracthandler.py @@ -0,0 +1,148 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Tests for all classes that represent benchmark files +""" + +from contextlib import nullcontext +from pathlib import Path +import shutil + +import pytest + +from ifsbench.data import ( + ExtractHandler +) + +@pytest.mark.parametrize('archive_path,archive_valid', [ + (Path('somewhere/archive.tar'), True), + ('somewhere/archive.tar', True), + (None, False), + (2, False) +]) +@pytest.mark.parametrize('target_dir, target_valid', [ + (Path('somewhere/archive.tar'), True), + ('somewhere/archive.tar', True), + (None, True), + (2, False) +]) +def test_extracthandler_init(archive_path, archive_valid, target_dir, target_valid): + """ + Initialise the ExtractHandler and make sure that only correct values are accepted. + """ + if archive_valid and target_valid: + context = nullcontext() + else: + context = pytest.raises(Exception) + + with context: + ExtractHandler(archive_path, target_dir) + +@pytest.fixture(name='archive') +def fixture_archive(): + paths = [ + 'data1/file1.txt', + 'data1/file2.txt', + 'data2/file1.txt', + 'data2/file2.txt', + ] + + return paths + + +@pytest.mark.parametrize('archive_path', [ + Path('somewhere/archive'), + 'somewhere/archive', +]) +@pytest.mark.parametrize('archive_relative', [True, False]) +@pytest.mark.parametrize('archive_type', ['zip', 'tar', 'gztar']) + +@pytest.mark.parametrize('target_dir', [ + Path('somewhere/extract'), + 'somewhere/extract', + None, +]) +@pytest.mark.parametrize('target_relative', [True, False]) +def test_extracthandler_execute(tmp_path, archive, archive_path, archive_relative, + archive_type, target_dir, target_relative): + """ + Test that the execute function moves the content of an archive to the right + directory. + + Parameters + ---------- + tmp_path: `pathlib.Path` + pytest-provided temporary directory which acts as our working directory. + + fixture_archive: + Directory structure inside the archive. + + archive_path: + Relative path (to tmp_path) where the archive resides, WITHOUT the + archive suffix. + + archive_relative: + Whether archive_path will be passed to the ExtractHandler as a relative + or absolute path. + + archive_type: + Which kind of archive is used (see `shutil.make_archive`). + + target_dir: + Relative path (to tmp_path) where the data will be extracted to. + + target_relative: + Whether target_dir will be passed to the ExtractHandler as a relative + or absolute path. + + """ + # Build the paths that are passed to the ExtractHandler. If the paths + # are supposed to be absolute, use tmp_path to build an absolute path. + # Also distinguish between str and Path (ExtractHandler should support + # both). + if not archive_relative: + if isinstance(archive_path, str): + archive_path = str((tmp_path/archive_path).resolve()) + else: + archive_path = (tmp_path/archive_path).resolve() + + if not target_relative and target_dir is not None: + if isinstance(archive_path, str): + target_dir = str((tmp_path/target_dir).resolve()) + else: + target_dir = (tmp_path/target_dir).resolve() + + # Build the archive that we will unpack by using pack_path as a directory + # that we will compress. Simply touch each file in fixture_archive. + pack_path = tmp_path/'pack' + for path in archive: + (pack_path/path).parent.mkdir(parents=True, exist_ok=True) + (pack_path/path).touch() + + if Path(archive_path).is_absolute(): + archive_path = shutil.make_archive(archive_path, archive_type, pack_path) + else: + archive_path = shutil.make_archive(tmp_path/archive_path, archive_type, pack_path) + + # Actually extract the archive. + handler = ExtractHandler(archive_path, target_dir) + handler.execute(tmp_path) + + # Build the path where the data should now be. As target_dir may be + # a Path, str or None - and absolute or relative - we have to determine + # the actual path first. + if target_dir is None: + extract_path = tmp_path + else: + extract_path = Path(target_dir) + + if not extract_path.is_absolute(): + extract_path = tmp_path/extract_path + + for path in archive: + assert (extract_path/path).exists() diff --git a/tests/data/test_namelisthandler.py b/tests/data/test_namelisthandler.py new file mode 100644 index 0000000..4121263 --- /dev/null +++ b/tests/data/test_namelisthandler.py @@ -0,0 +1,273 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Tests for all classes that represent benchmark files +""" + +from contextlib import nullcontext +from pathlib import Path + +from f90nml import Namelist +import pytest + +from ifsbench.data import ( + NamelistHandler, NamelistOverride +) + +@pytest.fixture(name = 'initial_namelist') +def fixture_namelist(): + namelist = Namelist() + + namelist['namelist1'] = { + 'int': 2, + 'str': 'test', + 'list': [2, 3, 'entry'] + } + + namelist['namelist2'] = {'int': 5} + + return namelist + + +@pytest.mark.parametrize('key,mode,value,success', [ + ('namelist1', NamelistOverride.NamelistOperation.APPEND, None, False), + ('namelist1', NamelistOverride.NamelistOperation.SET, None, False), + ('namelist1', NamelistOverride.NamelistOperation.DELETE, None, False), + ('namelist1/entry', NamelistOverride.NamelistOperation.DELETE, None, True), + ('namelist1/entry', NamelistOverride.NamelistOperation.SET, None, False), + ('namelist1/entry', NamelistOverride.NamelistOperation.APPEND, None, False), + ('namelist1/entry', NamelistOverride.NamelistOperation.SET, 2, True), + ('namelist1/entry', NamelistOverride.NamelistOperation.APPEND, 3, True), + (('namelist1', 'entry'), NamelistOverride.NamelistOperation.SET, 2, True), + (('namelist1', 'entry'), NamelistOverride.NamelistOperation.APPEND, 3, True), +]) +def test_extracthandler_init(key, mode, value, success): + """ + Initialise the NamelistOverride and make sure that only correct values are + accepted. + """ + + if success: + context = nullcontext() + else: + context = pytest.raises(ValueError) + + with context: + NamelistOverride(key, mode, value) + + +@pytest.mark.parametrize('key,value', [ + (('namelist1', 'int'), 5), + (('namelist1', 'list'), [0, 2]), + (('namelist2', 'int'), 'not an int'), + (('namelist2', 'newvalue'), 5), + (('namelist3', 'anothervalue'), [2,3,4]), +]) +def test_extracthandler_apply_set(initial_namelist, key, value): + """ + Initialise the NamelistOverride and make sure that only correct values are accepted. + """ + + namelist = Namelist(initial_namelist) + + override = NamelistOverride(key, NamelistOverride.NamelistOperation.SET, value) + + override.apply(namelist) + + assert namelist[key[0]][key[1]] == value + + for name, entry in namelist.items(): + for name2 in entry.keys(): + if (name, name2) != key: + assert entry[name2] == initial_namelist[name][name2] + +@pytest.mark.parametrize('key,value,success', [ + (('namelist1', 'int'), 5, False), + (('namelist1', 'list'), 3, True), + (('namelist1', 'list'), [2, 4], False), + (('namelist1', 'list'), 5, True), + (('namelist1', 'list'), 'Hello', False), + (('namelist2', 'int'), 'not an int', False), + (('namelist3', 'new_list'), 'not an int', True) +]) +def test_extracthandler_apply_append(initial_namelist, key, value, success): + """ + Initialise the NamelistOverride and make sure that only correct values are accepted. + """ + + namelist = Namelist(initial_namelist) + + override = NamelistOverride(key, NamelistOverride.NamelistOperation.APPEND, value) + + if success: + override.apply(namelist) + else: + with pytest.raises(ValueError): + override.apply(namelist) + return + + if key[0] in initial_namelist and key[1] in initial_namelist[key[0]]: + assert namelist[key[0]][key[1]] == initial_namelist[key[0]][key[1]] + [value] + else: + assert namelist[key[0]][key[1]] == [value] + + + for name, entry in namelist.items(): + for name2 in entry.keys(): + if (name, name2) != key: + assert entry[name2] == initial_namelist[name][name2] + + +@pytest.mark.parametrize('key', [ + ('namelist1', 'int'), + ('namelist1', 'list'), + ('namelist1', 'list'), + ('namelist2', 'int'), + ('doesnot', 'exist'), + ('namelist1', 'missing'), +]) +def test_extracthandler_apply_delete(initial_namelist, key): + """ + Initialise the NamelistOverride and make sure that only correct values are accepted. + """ + + namelist = Namelist(initial_namelist) + + override = NamelistOverride(key, NamelistOverride.NamelistOperation.DELETE) + + override.apply(namelist) + + for name, entry in initial_namelist.items(): + for name2 in entry.keys(): + if (name, name2) == key: + if name in namelist: + assert name2 not in namelist[name] + else: + assert namelist[name][name2] == initial_namelist[name][name2] + + +@pytest.mark.parametrize('input_path,input_valid', [ + (Path('somewhere/fort.4'), True), + ('somewhere/namelist', True), + (None, False), + (2, False) +]) +@pytest.mark.parametrize('output_path,output_valid', [ + (Path('somewhere/new_fort.4'), True), + ('somewhere/namelist', True), + (None, False), + (2, False) +]) +@pytest.mark.parametrize('overrides, overrides_valid', [ + ([], True), + ('Test', False), + (2, False), + ([NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5)], True), + ([ + NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5), + NamelistOverride('namelist/entry2', NamelistOverride.NamelistOperation.APPEND, 2), + NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.DELETE), + + ], True), +]) +def test_namelisthandler_init(input_path, input_valid, output_path, output_valid, overrides, overrides_valid): + """ + Initialise the NamelistHandler and make sure that only correct values are accepted. + """ + if input_valid and output_valid and overrides_valid: + context = nullcontext() + else: + context = pytest.raises(Exception) + + with context: + NamelistHandler(input_path, output_path, overrides) + + + +@pytest.mark.parametrize('input_path', [ + Path('somewhere/fort.4'), + 'somewhere/namelist' +]) +@pytest.mark.parametrize('input_relative', [True, False]) +@pytest.mark.parametrize('output_path', [ + Path('somewhere_else/new_fort.4'), + 'somewhere/namelist', +]) +@pytest.mark.parametrize('output_relative', [True, False]) +@pytest.mark.parametrize('overrides', [ + [], + [NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5)], + [ + NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5), + NamelistOverride('namelist/entry2', NamelistOverride.NamelistOperation.APPEND, 2), + NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.DELETE), + + ], +]) + +def test_namelisthandler_execute(tmp_path, initial_namelist, input_path, + input_relative, output_path, output_relative, + overrides): + """ + Test that the execute function modifies the namelists correctly. + + Parameters + ---------- + tmp_path: `pathlib.Path` + pytest-provided temporary directory which acts as our working directory. + + input_path: + Relative path (to tmp_path) where the input namelist resides. + + input_relative: + Whether input_path will be passed to the NamelistHandler as a relative + or absolute path. + + output_path: + Relative path (to tmp_path) to the output namelist. + + output_relative: + Whether output_path will be passed to the NamelistHandler as a relative + or absolute path. + + overrides: + The overrides that are applied. + + """ + # Build the paths that are passed to the NamelistHandler. If the paths + # are supposed to be absolute, use tmp_path to build an absolute path. + # Also distinguish between str and Path (ExtractHandler should support + # both). + if not input_relative: + if isinstance(input_path, str): + input_path = str((tmp_path/input_path).resolve()) + else: + input_path = (tmp_path/input_path).resolve() + + if not output_relative: + if isinstance(output_path, str): + output_path = str((tmp_path/output_path).resolve()) + else: + output_path = (tmp_path/output_path).resolve() + + # Create the initial namelist. + + abs_input_path = tmp_path/output_path + + abs_input_path.parent.mkdir(parents=True, exist_ok=True) + initial_namelist.write(abs_input_path) + + + # Actually extract the archive. + handler = NamelistHandler(input_path, output_path, overrides) + handler.execute(tmp_path) + + if output_relative: + assert (tmp_path/output_path).exists() + else: + assert Path(output_path).exists() diff --git a/tests/data/test_renamehandler.py b/tests/data/test_renamehandler.py new file mode 100644 index 0000000..cbbc4e5 --- /dev/null +++ b/tests/data/test_renamehandler.py @@ -0,0 +1,97 @@ +# (C) Copyright 2020- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Tests for all classes that represent benchmark files +""" + +import pytest + +from ifsbench.data import ( + RenameHandler +) + +@pytest.mark.parametrize('pattern,repl,mode,files_in,files_out', [ + (r'file', r'data', RenameHandler.RenameMode.MOVE, + ['data/data.txt','data1/data1.txt'], + ['data/data.txt','data1/data1.txt'], + ), + (r'(?Pdata[^/]*.txt)', r'new_dir/\g', RenameHandler.RenameMode.COPY, + ['data/data.txt','data1/data1.txt'], + ['data/data.txt','data1/new_dir/data1.txt', 'data/data.txt','data1/new_dir/data1.txt'], + ), + (r'data[^/]*/', r'', RenameHandler.RenameMode.SYMLINK, + ['data/data.txt','data1/data1.txt', 'data2/data1.txt'], + None, + ), + (r'(?Pdata[^/]*.txt)', r'newdir/\g', RenameHandler.RenameMode.SYMLINK, + ['data/data.txt','data1/data1.txt', 'data2/data1.txt'], + ['data/data.txt','data1/data1.txt', 'data2/data1.txt', + 'data/newdir/data.txt','data1/newdir/data1.txt', + 'data2/newdir/data1.txt'], + ), + (r'data[^/]*/', r'', RenameHandler.RenameMode.MOVE, + ['data/data.txt','data1/data1.txt', 'data1/data2.txt'], + ['data.txt', 'data1.txt', 'data2.txt'], + ), + (r'data[12]/', r'data/', RenameHandler.RenameMode.MOVE, + ['data/data.txt','data1/data.txt', 'data1/data2.txt'], + ['data/data.txt', 'data/data2.txt'], + ), + (r'replacement$', r'dummypath', RenameHandler.RenameMode.COPY, + ['dummypath/somedata.tar.gz','replacement'], + ['dummypath', 'replacement'], + ), +]) +def test_renamehandler_from_filename(tmp_path, pattern, repl, mode, files_in, files_out): + """ + Test that a RenameHandler created via from_filename works correctly. + + Parameters + ---------- + tmp_path: `pathlib.Path` + pytest-provided temporary directory which acts as our working directory. + + pattern: + The filename pattern that is used. + + repl: + The replacement pattern that is passed to from_filename + + mode: + The renaming mode that is passed to from_filename. + + files_in: + List of files that are initially placed in the working directory. + + files_out: + List of files that are expected to be in the working directory after + executing the RenameHandler. + If files_out is None, the tests expects the execute command to fail. + + """ + handler = RenameHandler(pattern, repl, mode) + + # Create the initial files in the working directory. + for f in files_in: + (tmp_path/f).parent.mkdir(parents=True, exist_ok=True) + (tmp_path/f).touch() + + if files_out is None: + with pytest.raises(Exception): + handler.execute(tmp_path) + return + + handler.execute(tmp_path) + + # Count the number of files in the working directory and make sure that + # this number is equal to len(file_out) + n_out = len([f for f in tmp_path.rglob('*') if not f.is_dir()]) + assert n_out == len(files_out) + + for f in files_out: + assert (tmp_path/f).exists()