ecmwf-ifs · johannesbulin · Nov 28, 2024
diff --git a/ifsbench/data/__init__.py b/ifsbench/data/__init__.py
@@ -0,0 +1,11 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from .datahandler import * # noqa
+from .extracthandler import * # noqa
+from .namelisthandler import * # noqa
+from .renamehandler import * # noqa
diff --git a/ifsbench/data/datahandler.py b/ifsbench/data/datahandler.py
@@ -0,0 +1,49 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from abc import ABC, abstractmethod
+
+__all__ = ['DataHandler']
+
+class DataHandler(ABC):
+    """
+    Base class for data pipeline steps. Each DataHandler object describes one
+    step in the data pipeline. Multiple DataHandler objects can be executed
+    sequentially to perform specific data setup tasks.
+    """
+
+
+    @abstractmethod
+    def execute(self, wdir, **kwargs):
+        """
+        Run this data handling operation in a given directory.
+
+        Parameters
+        ----------
+        wdir    : str or :any:`pathlib.Path`
+            The directory where the data handling should take place.
+            Subclasses of DataHandler should operate relative to this path,
+            unless absolute paths are given.
+        """
+        return NotImplemented
+
+    # @abstractmethod
+    # def to_dict(self):
+    #     """
+    #     Convert this object to a dictionary. It's class name and module name
+    #     should be given as the "class" and "module" entry, respectively.
+    #     """
+    #     return NotImplemented
+
+    # @classmethod
+    # @abstractmethod
+    # def from_dict(cls, data):
+    #     """
+    #     Convert a dictionary to an object of this type. This is the inverse
+    #     function to "to_dict".
+    #     """
+    #     return NotImplemented
diff --git a/ifsbench/data/extracthandler.py b/ifsbench/data/extracthandler.py
@@ -0,0 +1,53 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import pathlib
+import shutil
+
+from .datahandler import DataHandler
+from ..logging import debug
+
+__all__ = ['ExtractHandler']
+
+
+class ExtractHandler(DataHandler):
+    """
+    DataHandler that extracts a given archive to a specific directory.
+    """
+
+    def __init__(self, archive_path, target_dir=None):
+        """
+        Initialise the handler.
+
+        Parameters
+        ----------
+        archive_path: str or `pathlib.Path`
+            The path to the archive that will be extracted. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+
+        target_dir: str, `pathlib.Path` or `None`
+            The directory to where the archive will be unpacked. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+            If None is given, this will re extracted to `wdir`.
+        """
+
+        self._archive_path = pathlib.Path(archive_path)
+        if target_dir is None:
+            self._target_dir = None
+        else:
+            self._target_dir = pathlib.Path(target_dir)
+
+    def execute(self, wdir, **kwargs):
+        target_dir = wdir
+        if self._target_dir is not None:
+            if self._target_dir.is_absolute():
+                target_dir = self._target_dir
+            else:
+                target_dir = wdir/self._target_dir
+
+        debug(f"Unpack archive {self._archive_path} to {target_dir}.")
+        shutil.unpack_archive(self._archive_path, target_dir)
diff --git a/ifsbench/data/namelisthandler.py b/ifsbench/data/namelisthandler.py
@@ -0,0 +1,167 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from enum import auto, Enum
+import pathlib
+
+import f90nml
+
+from .datahandler import DataHandler
+from ..logging import debug, info
+
+
+__all__ = ['NamelistOverride', 'NamelistHandler']
+
+
+class NamelistOverride:
+    """
+    Specify changes that will be applied to a namelist.
+    """
+    class NamelistOperation(Enum):
+        SET = auto()
+        APPEND = auto()
+        DELETE = auto()
+
+    def __init__(self, key, mode, value=None):
+        """
+        Parameters
+        ----------
+        key: str or iterable of str
+            The namelist entry that will be modified. Can be either a string
+            where '/' separates the namelist name and the entry key or an iterable
+            of strings of length two.
+
+        mode: NamelistOverride.NamelistOperation
+            What kind of operation is specified. Can be
+                * Set a certain entry.
+                * Append to an array entry.
+                * Delete an entry.
+
+        value:
+            The value that is set (SET operation) or appended (APPEND).
+        """
+
+        if isinstance(key, str):
+            self._keys = key.split('/')
+        else:
+            self._keys = tuple(key)
+
+        if len(self._keys) != 2:
+            raise ValueError("The key object must be of length two.")
+
+        self._mode = mode
+        self._value = value
+
+        if self._value is None:
+            if self._mode in (self.NamelistOperation.SET, self.NamelistOperation.APPEND):
+                raise ValueError("The new value must not be None!")
+
+    def apply(self, namelist):
+        """
+        Apply the stored changes to a namelist.
+
+        Parameters
+        ----------
+        namelist: f90nml.Namelist
+            The namelist to which the changes are applied.
+        """
+
+        if self._keys[0] not in namelist:
+            if self._mode == self.NamelistOperation.DELETE:
+                return
+
+            namelist[self._keys[0]] = {}
+
+        namelist = namelist[self._keys[0]]
+        key = self._keys[-1]
+
+        if self._mode == self.NamelistOperation.SET:
+            debug(f"Set namelist entry {str(self._keys)} = {str(self._value)}.")
+            namelist[key] = self._value
+        elif self._mode == self.NamelistOperation.APPEND:
+            if key not in namelist:
+                namelist[key] = []
+
+            if not hasattr(namelist[key], 'append'):
+                raise ValueError("Values can only be appended to arrays!")
+
+            # f90nml doesn't seem to do any kind of checking, so we could
+            # create arrays in the namelist where the entries have different
+            # types.
+            # This will most likely cause issues, so we verify here, that
+            # the array entries have the same type.
+            if len(namelist[key]) > 0:
+                type_list = type(namelist[key][0])
+                type_value = type(self._value)
+
+                if type_list != type_value:
+                    raise ValueError("The given value must have the same type as existing array entries!")
+
+            debug(f"Append {str(self._value)} to namelist entry {str(self._keys)}.")
+
+            namelist[key].append(self._value)
+
+        elif self._mode == self.NamelistOperation.DELETE:
+            if key in namelist:
+                debug(f"Delete namelist entry {str(self._keys)}.")
+                del namelist[key]
+
+class NamelistHandler(DataHandler):
+    """
+    DataHandler specialisation that can modify Fortran namelists.
+    """
+
+    def __init__(self, input_path, output_path, overrides):
+        """
+        Initialise the handler.
+
+        Parameters
+        ----------
+        input_path: str or `pathlib.Path`
+            The path to the namelist that will be modified. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+
+        output_path: str, `pathlib.Path` or `None`
+            The path to which the updated namelist will be written. If a relative path
+            is given, this will be relative to the `wdir` argument in `execute`.
+            If None is given, this will re extracted to `wdir`.
+
+        overrides: iterable of NamelistOverride
+            The NamelistOverrides that will be applied.
+        """
+
+        self._input_path = pathlib.Path(input_path)
+        self._output_path = pathlib.Path(output_path)
+
+        self._overrides = list(overrides)
+        for override in self._overrides:
+            if not isinstance(override, NamelistOverride):
+                raise ValueError("Namelist overrides must be NamelistOverride objects!")
+
+    def execute(self, wdir, **kwargs):
+        if self._input_path.is_absolute():
+            input_path = self._input_path
+        else:
+            input_path = wdir/self._input_path
+
+        # Do nothing if the input namelist doesn't exist.
+        if not input_path.exists():
+            info(f"Namelist {input_path} doesn't exist.")
+            return
+
+        if self._output_path.is_absolute():
+            output_path = self._output_path
+        else:
+            output_path = wdir/self._output_path
+
+        debug(f"Modify namelist {input_path}.")
+        namelist = f90nml.read(input_path)
+
+        for override in self._overrides:
+            override.apply(namelist)
+
+        namelist.write(output_path, force=True)
diff --git a/ifsbench/data/renamehandler.py b/ifsbench/data/renamehandler.py
@@ -0,0 +1,114 @@
+# (C) Copyright 2020- ECMWF.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+from enum import auto, Enum
+from pathlib import Path
+import re
+import shutil
+
+from .datahandler import DataHandler
+from ..logging import debug
+
+__all__ = ['RenameHandler']
+
+class RenameHandler(DataHandler):
+    """
+    DataHandler specialisation that can move/rename files by using regular
+    expressions (as in re.sub).
+    """
+
+    class RenameMode(Enum):
+        """
+        Enumeration of available rename operations.
+
+        Attributes
+        ----------
+        COPY :
+            Copy the file from its current place to the new location.
+        SYMLINK :
+            Create a symlink in the new location, pointing to its current
+            location.
+        MOVE :
+            Move the file from its current place to the new location.
+        """
+        COPY = auto()
+        SYMLINK = auto()
+        MOVE = auto()
+
+    def __init__(self, pattern, repl, mode=RenameMode.SYMLINK):
+        """
+        Initialise the handler.
+
+        Parameters
+        ----------
+        pattern: str
+            The pattern that will be replaced. Corresponds to `pattern` in
+            `re.sub`.
+
+        repl: str
+            The replacement pattern. Corresponds to `repl` in `re.sub`.
+
+        mode:   `RenameHandler.RenameMode`
+            Specifies how the renaming is done (copy, move, symlink).
+
+        mode:   `RenameHandler.RenameMode`
+            Specifies how the renaming is done (copy, move, symlink).
+        """
+        self._pattern = str(pattern)
+        self._repl = str(repl)
+        self._mode = mode
+
+
+    def execute(self, wdir, **kwargs):
+        # We create a dictionary first, that stores the paths that will be
+        # modified.
+        path_mapping = {}
+
+        for f in list(wdir.rglob('*')):
+            if f.is_dir():
+                continue
+
+            dest = Path(re.sub(self._pattern, self._repl, str(f.relative_to(wdir))))
+            dest = (wdir/dest).resolve()
+
+            if f != dest:
+                path_mapping[f] = dest
+
+        # Check that we don't end up with two initial files being renamed to
+        # the same file. Crash if this is the case.
+        if len(set(path_mapping.keys())) != len(set(path_mapping.values())):
+            raise RuntimeError("Renaming would cause two different files to be given the same name!")
+
+        for source, dest in path_mapping.items():
+            # Crash if we are renaming one of the files to a path that is also
+            # the "source" for another renaming.
+            if dest in path_mapping:
+                raise RuntimeError(f"Can't move {source} to {dest} as there is a cyclical dependency!")
+
+            # Delete whatever resides at dest at the moment (whether it's a
+            # file or a directory).
+            if dest.exists():
+                debug(f"Delete existing file/directory {dest} before renaming.")
+                try:
+                    shutil.rmtree(dest)
+                except NotADirectoryError:
+                    dest.unlink()
+
+            dest.parent.mkdir(parents=True, exist_ok=True)
+
+            if self._mode == self.RenameMode.COPY:
+                debug(f"Copy {source} to {dest}.")
+
+                shutil.copy(source, dest)
+            elif self._mode == self.RenameMode.SYMLINK:
+                debug(f"Symlink {source} to {dest}.")
+
+                dest.symlink_to(source)
+            elif self._mode == self.RenameMode.MOVE:
+                debug(f"Move {source} to {dest}.")
+
+                source.rename(dest)