From 77b77d3c63bd4f0c74560c1425e610ccb13ce8ac Mon Sep 17 00:00:00 2001
From: Johannes Bulin <johannes.bulin@ecmwf.int>
Date: Thu, 28 Nov 2024 14:41:21 +0000
Subject: [PATCH] Implemented DataHandler class and extract/remove/namelist
 subclasses (#6).

---
 ifsbench/data/__init__.py          |  11 ++
 ifsbench/data/datahandler.py       |  49 ++++++
 ifsbench/data/extracthandler.py    |  53 ++++++
 ifsbench/data/namelisthandler.py   | 167 ++++++++++++++++++
 ifsbench/data/renamehandler.py     | 114 ++++++++++++
 tests/data/test_extracthandler.py  | 148 ++++++++++++++++
 tests/data/test_namelisthandler.py | 273 +++++++++++++++++++++++++++++
 tests/data/test_renamehandler.py   |  97 ++++++++++
 8 files changed, 912 insertions(+)
 create mode 100644 ifsbench/data/__init__.py
 create mode 100644 ifsbench/data/datahandler.py
 create mode 100644 ifsbench/data/extracthandler.py
 create mode 100644 ifsbench/data/namelisthandler.py
 create mode 100644 ifsbench/data/renamehandler.py
 create mode 100644 tests/data/test_extracthandler.py
 create mode 100644 tests/data/test_namelisthandler.py
 create mode 100644 tests/data/test_renamehandler.py

diff --git a/ifsbench/data/__init__.py b/ifsbench/data/__init__.py
new file mode 100644
index 0000000..ef939b6
--- /dev/null
+++ b/ifsbench/data/__init__.py
@@ -0,0 +1,11 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from .datahandler import * # noqa
+from .extracthandler import * # noqa
+from .namelisthandler import * # noqa
+from .renamehandler import * # noqa
diff --git a/ifsbench/data/datahandler.py b/ifsbench/data/datahandler.py
new file mode 100644
index 0000000..a45fc25
--- /dev/null
+++ b/ifsbench/data/datahandler.py
@@ -0,0 +1,49 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from abc import ABC, abstractmethod
+
+__all__ = ['DataHandler']
+
+class DataHandler(ABC):
+    """
+    Base class for data pipeline steps. Each DataHandler object describes one
+    step in the data pipeline. Multiple DataHandler objects can be executed
+    sequentially to perform specific data setup tasks.
+    """
+
+
+    @abstractmethod
+    def execute(self, wdir, **kwargs):
+        """
+        Run this data handling operation in a given directory.
+
+        Parameters
+        ----------
+        wdir    : str or :any:`pathlib.Path`
+            The directory where the data handling should take place.
+            Subclasses of DataHandler should operate relative to this path,
+            unless absolute paths are given.
+        """
+        return NotImplemented
+
+    # @abstractmethod
+    # def to_dict(self):
+    #     """
+    #     Convert this object to a dictionary. It's class name and module name
+    #     should be given as the "class" and "module" entry, respectively.
+    #     """
+    #     return NotImplemented
+
+    # @classmethod
+    # @abstractmethod
+    # def from_dict(cls, data):
+    #     """
+    #     Convert a dictionary to an object of this type. This is the inverse
+    #     function to "to_dict".
+    #     """
+    #     return NotImplemented
diff --git a/ifsbench/data/extracthandler.py b/ifsbench/data/extracthandler.py
new file mode 100644
index 0000000..222ef3b
--- /dev/null
+++ b/ifsbench/data/extracthandler.py
@@ -0,0 +1,53 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import pathlib
+import shutil
+
+from .datahandler import DataHandler
+from ..logging import debug
+
+__all__ = ['ExtractHandler']
+
+
+class ExtractHandler(DataHandler):
+    """
+    DataHandler that extracts a given archive to a specific directory.
+    """
+
+    def __init__(self, archive_path, target_dir=None):
+        """
+        Initialise the handler.
+
+        Parameters
+        ----------
+        archive_path: str or `pathlib.Path`
+            The path to the archive that will be extracted. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+
+        target_dir: str, `pathlib.Path` or `None`
+            The directory to where the archive will be unpacked. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+            If None is given, this will re extracted to `wdir`.
+        """
+
+        self._archive_path = pathlib.Path(archive_path)
+        if target_dir is None:
+            self._target_dir = None
+        else:
+            self._target_dir = pathlib.Path(target_dir)
+
+    def execute(self, wdir, **kwargs):
+        target_dir = wdir
+        if self._target_dir is not None:
+            if self._target_dir.is_absolute():
+                target_dir = self._target_dir
+            else:
+                target_dir = wdir/self._target_dir
+
+        debug(f"Unpack archive {self._archive_path} to {target_dir}.")
+        shutil.unpack_archive(self._archive_path, target_dir)
diff --git a/ifsbench/data/namelisthandler.py b/ifsbench/data/namelisthandler.py
new file mode 100644
index 0000000..e874740
--- /dev/null
+++ b/ifsbench/data/namelisthandler.py
@@ -0,0 +1,167 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from enum import auto, Enum
+import pathlib
+
+import f90nml
+
+from .datahandler import DataHandler
+from ..logging import debug, info
+
+
+__all__ = ['NamelistOverride', 'NamelistHandler']
+
+
+class NamelistOverride:
+    """
+    Specify changes that will be applied to a namelist.
+    """
+    class NamelistOperation(Enum):
+        SET = auto()
+        APPEND = auto()
+        DELETE = auto()
+
+    def __init__(self, key, mode, value=None):
+        """
+        Parameters
+        ----------
+        key: str or iterable of str
+            The namelist entry that will be modified. Can be either a string
+            where '/' separates the namelist name and the entry key or an iterable
+            of strings of length two.
+
+        mode: NamelistOverride.NamelistOperation
+            What kind of operation is specified. Can be
+                * Set a certain entry.
+                * Append to an array entry.
+                * Delete an entry.
+
+        value:
+            The value that is set (SET operation) or appended (APPEND).
+        """
+
+        if isinstance(key, str):
+            self._keys = key.split('/')
+        else:
+            self._keys = tuple(key)
+
+        if len(self._keys) != 2:
+            raise ValueError("The key object must be of length two.")
+
+        self._mode = mode
+        self._value = value
+
+        if self._value is None:
+            if self._mode in (self.NamelistOperation.SET, self.NamelistOperation.APPEND):
+                raise ValueError("The new value must not be None!")
+
+    def apply(self, namelist):
+        """
+        Apply the stored changes to a namelist.
+
+        Parameters
+        ----------
+        namelist: f90nml.Namelist
+            The namelist to which the changes are applied.
+        """
+
+        if self._keys[0] not in namelist:
+            if self._mode == self.NamelistOperation.DELETE:
+                return
+
+            namelist[self._keys[0]] = {}
+
+        namelist = namelist[self._keys[0]]
+        key = self._keys[-1]
+
+        if self._mode == self.NamelistOperation.SET:
+            debug(f"Set namelist entry {str(self._keys)} = {str(self._value)}.")
+            namelist[key] = self._value
+        elif self._mode == self.NamelistOperation.APPEND:
+            if key not in namelist:
+                namelist[key] = []
+
+            if not hasattr(namelist[key], 'append'):
+                raise ValueError("Values can only be appended to arrays!")
+
+            # f90nml doesn't seem to do any kind of checking, so we could
+            # create arrays in the namelist where the entries have different
+            # types.
+            # This will most likely cause issues, so we verify here, that
+            # the array entries have the same type.
+            if len(namelist[key]) > 0:
+                type_list = type(namelist[key][0])
+                type_value = type(self._value)
+
+                if type_list != type_value:
+                    raise ValueError("The given value must have the same type as existing array entries!")
+
+            debug(f"Append {str(self._value)} to namelist entry {str(self._keys)}.")
+
+            namelist[key].append(self._value)
+
+        elif self._mode == self.NamelistOperation.DELETE:
+            if key in namelist:
+                debug(f"Delete namelist entry {str(self._keys)}.")
+                del namelist[key]
+
+class NamelistHandler(DataHandler):
+    """
+    DataHandler specialisation that can modify Fortran namelists.
+    """
+
+    def __init__(self, input_path, output_path, overrides):
+        """
+        Initialise the handler.
+
+        Parameters
+        ----------
+        input_path: str or `pathlib.Path`
+            The path to the namelist that will be modified. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+
+        output_path: str, `pathlib.Path` or `None`
+            The path to which the updated namelist will be written. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+            If None is given, this will re extracted to `wdir`.
+
+        overrides: iterable of NamelistOverride
+            The NamelistOverrides that will be applied.
+        """
+
+        self._input_path = pathlib.Path(input_path)
+        self._output_path = pathlib.Path(output_path)
+
+        self._overrides = list(overrides)
+        for override in self._overrides:
+            if not isinstance(override, NamelistOverride):
+                raise ValueError("Namelist overrides must be NamelistOverride objects!")
+
+    def execute(self, wdir, **kwargs):
+        if self._input_path.is_absolute():
+            input_path = self._input_path
+        else:
+            input_path = wdir/self._input_path
+
+        # Do nothing if the input namelist doesn't exist.
+        if not input_path.exists():
+            info(f"Namelist {input_path} doesn't exist.")
+            return
+
+        if self._output_path.is_absolute():
+            output_path = self._output_path
+        else:
+            output_path = wdir/self._output_path
+
+        debug(f"Modify namelist {input_path}.")
+        namelist = f90nml.read(input_path)
+
+        for override in self._overrides:
+            override.apply(namelist)
+
+        namelist.write(output_path, force=True)
diff --git a/ifsbench/data/renamehandler.py b/ifsbench/data/renamehandler.py
new file mode 100644
index 0000000..c0d04ce
--- /dev/null
+++ b/ifsbench/data/renamehandler.py
@@ -0,0 +1,114 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from enum import auto, Enum
+from pathlib import Path
+import re
+import shutil
+
+from .datahandler import DataHandler
+from ..logging import debug
+
+__all__ = ['RenameHandler']
+
+class RenameHandler(DataHandler):
+    """
+    DataHandler specialisation that can move/rename files by using regular
+    expressions (as in re.sub).
+    """
+
+    class RenameMode(Enum):
+        """
+        Enumeration of available rename operations.
+
+        Attributes
+        ----------
+        COPY :
+            Copy the file from its current place to the new location.
+        SYMLINK :
+            Create a symlink in the new location, pointing to its current
+            location.
+        MOVE :
+            Move the file from its current place to the new location.
+        """
+        COPY = auto()
+        SYMLINK = auto()
+        MOVE = auto()
+
+    def __init__(self, pattern, repl, mode=RenameMode.SYMLINK):
+        """
+        Initialise the handler.
+
+        Parameters
+        ----------
+        pattern: str
+            The pattern that will be replaced. Corresponds to `pattern` in
+            `re.sub`.
+
+        repl: str
+            The replacement pattern. Corresponds to `repl` in `re.sub`.
+
+        mode:   `RenameHandler.RenameMode`
+            Specifies how the renaming is done (copy, move, symlink).
+
+        mode:   `RenameHandler.RenameMode`
+            Specifies how the renaming is done (copy, move, symlink).
+        """
+        self._pattern = str(pattern)
+        self._repl = str(repl)
+        self._mode = mode
+
+
+    def execute(self, wdir, **kwargs):
+        # We create a dictionary first, that stores the paths that will be
+        # modified.
+        path_mapping = {}
+
+        for f in list(wdir.rglob('*')):
+            if f.is_dir():
+                continue
+
+            dest = Path(re.sub(self._pattern, self._repl, str(f.relative_to(wdir))))
+            dest = (wdir/dest).resolve()
+
+            if f != dest:
+                path_mapping[f] = dest
+
+        # Check that we don't end up with two initial files being renamed to
+        # the same file. Crash if this is the case.
+        if len(set(path_mapping.keys())) != len(set(path_mapping.values())):
+            raise RuntimeError("Renaming would cause two different files to be given the same name!")
+
+        for source, dest in path_mapping.items():
+            # Crash if we are renaming one of the files to a path that is also
+            # the "source" for another renaming.
+            if dest in path_mapping:
+                raise RuntimeError(f"Can't move {source} to {dest} as there is a cyclical dependency!")
+
+            # Delete whatever resides at dest at the moment (whether it's a
+            # file or a directory).
+            if dest.exists():
+                debug(f"Delete existing file/directory {dest} before renaming.")
+                try:
+                    shutil.rmtree(dest)
+                except NotADirectoryError:
+                    dest.unlink()
+
+            dest.parent.mkdir(parents=True, exist_ok=True)
+
+            if self._mode == self.RenameMode.COPY:
+                debug(f"Copy {source} to {dest}.")
+
+                shutil.copy(source, dest)
+            elif self._mode == self.RenameMode.SYMLINK:
+                debug(f"Symlink {source} to {dest}.")
+
+                dest.symlink_to(source)
+            elif self._mode == self.RenameMode.MOVE:
+                debug(f"Move {source} to {dest}.")
+
+                source.rename(dest)
diff --git a/tests/data/test_extracthandler.py b/tests/data/test_extracthandler.py
new file mode 100644
index 0000000..d3ad6bf
--- /dev/null
+++ b/tests/data/test_extracthandler.py
@@ -0,0 +1,148 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+"""
+Tests for all classes that represent benchmark files
+"""
+
+from contextlib import nullcontext
+from pathlib import Path
+import shutil
+
+import pytest
+
+from ifsbench.data import (
+    ExtractHandler
+)
+
+@pytest.mark.parametrize('archive_path,archive_valid', [
+    (Path('somewhere/archive.tar'), True),
+    ('somewhere/archive.tar', True),
+    (None, False),
+    (2, False)
+])
+@pytest.mark.parametrize('target_dir, target_valid', [
+    (Path('somewhere/archive.tar'), True),
+    ('somewhere/archive.tar', True),
+    (None, True),
+    (2, False)
+])
+def test_extracthandler_init(archive_path, archive_valid, target_dir, target_valid):
+    """
+    Initialise the ExtractHandler and make sure that only correct values are accepted.
+    """
+    if archive_valid and target_valid:
+        context = nullcontext()
+    else:
+        context = pytest.raises(Exception)
+
+    with context:
+        ExtractHandler(archive_path, target_dir)
+
+@pytest.fixture(name='archive')
+def fixture_archive():
+    paths = [
+        'data1/file1.txt',
+        'data1/file2.txt',
+        'data2/file1.txt',
+        'data2/file2.txt',
+    ]
+
+    return paths
+
+
+@pytest.mark.parametrize('archive_path', [
+    Path('somewhere/archive'),
+    'somewhere/archive',
+])
+@pytest.mark.parametrize('archive_relative', [True, False])
+@pytest.mark.parametrize('archive_type', ['zip', 'tar', 'gztar'])
+
+@pytest.mark.parametrize('target_dir', [
+    Path('somewhere/extract'),
+    'somewhere/extract',
+    None,
+])
+@pytest.mark.parametrize('target_relative', [True, False])
+def test_extracthandler_execute(tmp_path, archive, archive_path, archive_relative,
+                                archive_type, target_dir, target_relative):
+    """
+    Test that the execute function moves the content of an archive to the right
+    directory.
+
+        Parameters
+        ----------
+        tmp_path: `pathlib.Path`
+            pytest-provided temporary directory which acts as our working directory.
+
+        fixture_archive:
+            Directory structure inside the archive.
+
+        archive_path:
+            Relative path (to tmp_path) where the archive resides, WITHOUT the
+            archive suffix.
+
+        archive_relative:
+            Whether archive_path will be passed to the ExtractHandler as a relative
+            or absolute path.
+
+        archive_type:
+            Which kind of archive is used (see `shutil.make_archive`).
+
+        target_dir:
+            Relative path (to tmp_path) where the data will be extracted to.
+
+        target_relative:
+            Whether target_dir will be passed to the ExtractHandler as a relative
+            or absolute path.
+
+    """
+    # Build the paths that are passed to the ExtractHandler. If the paths
+    # are supposed to be absolute, use tmp_path to build an absolute path.
+    # Also distinguish between str and Path (ExtractHandler should support
+    # both).
+    if not archive_relative:
+        if isinstance(archive_path, str):
+            archive_path = str((tmp_path/archive_path).resolve())
+        else:
+            archive_path = (tmp_path/archive_path).resolve()
+
+    if not target_relative and target_dir is not None:
+        if isinstance(archive_path, str):
+            target_dir = str((tmp_path/target_dir).resolve())
+        else:
+            target_dir = (tmp_path/target_dir).resolve()
+
+    # Build the archive that we will unpack by using pack_path as a directory
+    # that we will compress. Simply touch each file in fixture_archive.
+    pack_path = tmp_path/'pack'
+    for path in archive:
+        (pack_path/path).parent.mkdir(parents=True, exist_ok=True)
+        (pack_path/path).touch()
+
+    if Path(archive_path).is_absolute():
+        archive_path = shutil.make_archive(archive_path, archive_type, pack_path)
+    else:
+        archive_path = shutil.make_archive(tmp_path/archive_path, archive_type, pack_path)
+
+    # Actually extract the archive.
+    handler = ExtractHandler(archive_path, target_dir)
+    handler.execute(tmp_path)
+
+    # Build the path where the data should now be. As target_dir may be
+    # a Path, str or None - and absolute or relative - we have to determine
+    # the actual path first.
+    if target_dir is None:
+        extract_path = tmp_path
+    else:
+        extract_path = Path(target_dir)
+
+    if not extract_path.is_absolute():
+        extract_path = tmp_path/extract_path
+
+    for path in archive:
+        assert (extract_path/path).exists()
diff --git a/tests/data/test_namelisthandler.py b/tests/data/test_namelisthandler.py
new file mode 100644
index 0000000..4121263
--- /dev/null
+++ b/tests/data/test_namelisthandler.py
@@ -0,0 +1,273 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+"""
+Tests for all classes that represent benchmark files
+"""
+
+from contextlib import nullcontext
+from pathlib import Path
+
+from f90nml import Namelist
+import pytest
+
+from ifsbench.data import (
+    NamelistHandler, NamelistOverride
+)
+
+@pytest.fixture(name = 'initial_namelist')
+def fixture_namelist():
+    namelist = Namelist()
+
+    namelist['namelist1'] = {
+        'int': 2,
+        'str': 'test',
+        'list': [2, 3, 'entry']
+    }
+
+    namelist['namelist2'] = {'int': 5}
+
+    return namelist
+
+
+@pytest.mark.parametrize('key,mode,value,success', [
+    ('namelist1', NamelistOverride.NamelistOperation.APPEND, None, False),
+    ('namelist1', NamelistOverride.NamelistOperation.SET, None, False),
+    ('namelist1', NamelistOverride.NamelistOperation.DELETE, None, False),
+    ('namelist1/entry', NamelistOverride.NamelistOperation.DELETE, None, True),
+    ('namelist1/entry', NamelistOverride.NamelistOperation.SET, None, False),
+    ('namelist1/entry', NamelistOverride.NamelistOperation.APPEND, None, False),
+    ('namelist1/entry', NamelistOverride.NamelistOperation.SET, 2, True),
+    ('namelist1/entry', NamelistOverride.NamelistOperation.APPEND, 3, True),
+    (('namelist1', 'entry'), NamelistOverride.NamelistOperation.SET, 2, True),
+    (('namelist1', 'entry'), NamelistOverride.NamelistOperation.APPEND, 3, True),
+])
+def test_extracthandler_init(key, mode, value, success):
+    """
+    Initialise the NamelistOverride and make sure that only correct values are
+    accepted.
+    """
+
+    if success:
+        context = nullcontext()
+    else:
+        context = pytest.raises(ValueError)
+
+    with context:
+        NamelistOverride(key, mode, value)
+
+
+@pytest.mark.parametrize('key,value', [
+    (('namelist1', 'int'), 5),
+    (('namelist1', 'list'), [0, 2]),
+    (('namelist2', 'int'), 'not an int'),
+    (('namelist2', 'newvalue'), 5),
+    (('namelist3', 'anothervalue'), [2,3,4]),
+])
+def test_extracthandler_apply_set(initial_namelist, key, value):
+    """
+    Initialise the NamelistOverride and make sure that only correct values are accepted.
+    """
+
+    namelist = Namelist(initial_namelist)
+
+    override = NamelistOverride(key, NamelistOverride.NamelistOperation.SET, value)
+
+    override.apply(namelist)
+
+    assert namelist[key[0]][key[1]] == value
+
+    for name, entry in namelist.items():
+        for name2 in entry.keys():
+            if (name, name2) != key:
+                assert entry[name2] == initial_namelist[name][name2]
+
+@pytest.mark.parametrize('key,value,success', [
+    (('namelist1', 'int'), 5, False),
+    (('namelist1', 'list'), 3, True),
+    (('namelist1', 'list'), [2, 4], False),
+    (('namelist1', 'list'), 5, True),
+    (('namelist1', 'list'), 'Hello', False),
+    (('namelist2', 'int'), 'not an int', False),
+    (('namelist3', 'new_list'), 'not an int', True)
+])
+def test_extracthandler_apply_append(initial_namelist, key, value, success):
+    """
+    Initialise the NamelistOverride and make sure that only correct values are accepted.
+    """
+
+    namelist = Namelist(initial_namelist)
+
+    override = NamelistOverride(key, NamelistOverride.NamelistOperation.APPEND, value)
+
+    if success:
+        override.apply(namelist)
+    else:
+        with pytest.raises(ValueError):
+            override.apply(namelist)
+        return
+
+    if key[0] in initial_namelist and key[1] in initial_namelist[key[0]]:
+        assert namelist[key[0]][key[1]] == initial_namelist[key[0]][key[1]] + [value]
+    else:
+        assert namelist[key[0]][key[1]] == [value]
+
+
+    for name, entry in namelist.items():
+        for name2 in entry.keys():
+            if (name, name2) != key:
+                assert entry[name2] == initial_namelist[name][name2]
+
+
+@pytest.mark.parametrize('key', [
+    ('namelist1', 'int'),
+    ('namelist1', 'list'),
+    ('namelist1', 'list'),
+    ('namelist2', 'int'),
+    ('doesnot', 'exist'),
+    ('namelist1', 'missing'),
+])
+def test_extracthandler_apply_delete(initial_namelist, key):
+    """
+    Initialise the NamelistOverride and make sure that only correct values are accepted.
+    """
+
+    namelist = Namelist(initial_namelist)
+
+    override = NamelistOverride(key, NamelistOverride.NamelistOperation.DELETE)
+
+    override.apply(namelist)
+
+    for name, entry in initial_namelist.items():
+        for name2 in entry.keys():
+            if (name, name2) == key:
+                if name in namelist:
+                    assert name2 not in namelist[name]
+            else:
+                assert namelist[name][name2] == initial_namelist[name][name2]
+
+
+@pytest.mark.parametrize('input_path,input_valid', [
+    (Path('somewhere/fort.4'), True),
+    ('somewhere/namelist', True),
+    (None, False),
+    (2, False)
+])
+@pytest.mark.parametrize('output_path,output_valid', [
+    (Path('somewhere/new_fort.4'), True),
+    ('somewhere/namelist', True),
+    (None, False),
+    (2, False)
+])
+@pytest.mark.parametrize('overrides, overrides_valid', [
+    ([], True),
+    ('Test', False),
+    (2, False),
+    ([NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5)], True),
+    ([
+        NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5),
+        NamelistOverride('namelist/entry2', NamelistOverride.NamelistOperation.APPEND, 2),
+        NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.DELETE),
+
+    ], True),
+])
+def test_namelisthandler_init(input_path, input_valid, output_path, output_valid, overrides, overrides_valid):
+    """
+    Initialise the NamelistHandler and make sure that only correct values are accepted.
+    """
+    if input_valid and output_valid and overrides_valid:
+        context = nullcontext()
+    else:
+        context = pytest.raises(Exception)
+
+    with context:
+        NamelistHandler(input_path, output_path, overrides)
+
+
+
+@pytest.mark.parametrize('input_path', [
+    Path('somewhere/fort.4'),
+    'somewhere/namelist'
+])
+@pytest.mark.parametrize('input_relative', [True, False])
+@pytest.mark.parametrize('output_path', [
+    Path('somewhere_else/new_fort.4'),
+    'somewhere/namelist',
+])
+@pytest.mark.parametrize('output_relative', [True, False])
+@pytest.mark.parametrize('overrides', [
+    [],
+    [NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5)],
+    [
+        NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.SET, 5),
+        NamelistOverride('namelist/entry2', NamelistOverride.NamelistOperation.APPEND, 2),
+        NamelistOverride('namelist/entry', NamelistOverride.NamelistOperation.DELETE),
+
+    ],
+])
+
+def test_namelisthandler_execute(tmp_path, initial_namelist, input_path,
+                                 input_relative, output_path, output_relative,
+                                 overrides):
+    """
+    Test that the execute function modifies the namelists correctly.
+
+        Parameters
+        ----------
+        tmp_path: `pathlib.Path`
+            pytest-provided temporary directory which acts as our working directory.
+
+        input_path:
+            Relative path (to tmp_path) where the input namelist resides.
+
+        input_relative:
+            Whether input_path will be passed to the NamelistHandler as a relative
+            or absolute path.
+
+        output_path:
+            Relative path (to tmp_path) to the output namelist.
+
+        output_relative:
+            Whether output_path will be passed to the NamelistHandler as a relative
+            or absolute path.
+
+        overrides:
+            The overrides that are applied.
+
+    """
+    # Build the paths that are passed to the NamelistHandler. If the paths
+    # are supposed to be absolute, use tmp_path to build an absolute path.
+    # Also distinguish between str and Path (ExtractHandler should support
+    # both).
+    if not input_relative:
+        if isinstance(input_path, str):
+            input_path = str((tmp_path/input_path).resolve())
+        else:
+            input_path = (tmp_path/input_path).resolve()
+
+    if not output_relative:
+        if isinstance(output_path, str):
+            output_path = str((tmp_path/output_path).resolve())
+        else:
+            output_path = (tmp_path/output_path).resolve()
+
+    # Create the initial namelist.
+
+    abs_input_path = tmp_path/output_path
+
+    abs_input_path.parent.mkdir(parents=True, exist_ok=True)
+    initial_namelist.write(abs_input_path)
+
+
+    # Actually extract the archive.
+    handler = NamelistHandler(input_path, output_path, overrides)
+    handler.execute(tmp_path)
+
+    if output_relative:
+        assert (tmp_path/output_path).exists()
+    else:
+        assert Path(output_path).exists()
diff --git a/tests/data/test_renamehandler.py b/tests/data/test_renamehandler.py
new file mode 100644
index 0000000..cbbc4e5
--- /dev/null
+++ b/tests/data/test_renamehandler.py
@@ -0,0 +1,97 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+"""
+Tests for all classes that represent benchmark files
+"""
+
+import pytest
+
+from ifsbench.data import (
+    RenameHandler
+)
+
+@pytest.mark.parametrize('pattern,repl,mode,files_in,files_out', [
+    (r'file', r'data', RenameHandler.RenameMode.MOVE,
+        ['data/data.txt','data1/data1.txt'],
+        ['data/data.txt','data1/data1.txt'],
+    ),
+    (r'(?P<name>data[^/]*.txt)', r'new_dir/\g<name>', RenameHandler.RenameMode.COPY,
+        ['data/data.txt','data1/data1.txt'],
+        ['data/data.txt','data1/new_dir/data1.txt', 'data/data.txt','data1/new_dir/data1.txt'],
+    ),
+    (r'data[^/]*/', r'', RenameHandler.RenameMode.SYMLINK,
+        ['data/data.txt','data1/data1.txt', 'data2/data1.txt'],
+        None,
+    ),
+    (r'(?P<name>data[^/]*.txt)', r'newdir/\g<name>', RenameHandler.RenameMode.SYMLINK,
+        ['data/data.txt','data1/data1.txt', 'data2/data1.txt'],
+        ['data/data.txt','data1/data1.txt', 'data2/data1.txt',
+            'data/newdir/data.txt','data1/newdir/data1.txt',
+            'data2/newdir/data1.txt'],
+    ),
+    (r'data[^/]*/', r'', RenameHandler.RenameMode.MOVE,
+        ['data/data.txt','data1/data1.txt', 'data1/data2.txt'],
+        ['data.txt', 'data1.txt', 'data2.txt'],
+    ),
+    (r'data[12]/', r'data/', RenameHandler.RenameMode.MOVE,
+        ['data/data.txt','data1/data.txt', 'data1/data2.txt'],
+        ['data/data.txt', 'data/data2.txt'],
+    ),
+    (r'replacement$', r'dummypath', RenameHandler.RenameMode.COPY,
+        ['dummypath/somedata.tar.gz','replacement'],
+        ['dummypath', 'replacement'],
+    ),
+])
+def test_renamehandler_from_filename(tmp_path, pattern, repl, mode, files_in, files_out):
+    """
+    Test that a RenameHandler created via from_filename works correctly.
+
+        Parameters
+        ----------
+        tmp_path: `pathlib.Path`
+            pytest-provided temporary directory which acts as our working directory.
+
+        pattern:
+            The filename pattern that is used.
+
+        repl:
+            The replacement pattern that is passed to from_filename
+
+        mode:
+            The renaming mode that is passed to from_filename.
+
+        files_in:
+            List of files that are initially placed in the working directory.
+
+        files_out:
+            List of files that are expected to be in the working directory after
+            executing the RenameHandler.
+            If files_out is None, the tests expects the execute command to fail.
+
+    """
+    handler = RenameHandler(pattern, repl, mode)
+
+    # Create the initial files in the working directory.
+    for f in files_in:
+        (tmp_path/f).parent.mkdir(parents=True, exist_ok=True)
+        (tmp_path/f).touch()
+
+    if files_out is None:
+        with pytest.raises(Exception):
+            handler.execute(tmp_path)
+        return
+
+    handler.execute(tmp_path)
+
+    # Count the number of files in the working directory and make sure that
+    # this number is equal to len(file_out)
+    n_out = len([f for f in tmp_path.rglob('*') if not f.is_dir()])
+    assert n_out == len(files_out)
+
+    for f in files_out:
+        assert (tmp_path/f).exists()