metoppv · brhooper · Feb 21, 2025 · Dec 19, 2024 · Dec 31, 2024 · Dec 31, 2024
diff --git a/improver/utilities/mathematical_operations.py b/improver/utilities/mathematical_operations.py
@@ -9,6 +9,7 @@
 import iris
 import numpy as np
 import numpy.ma as ma
+from iris.coords import CellMethod
 from iris.cube import Cube
 from numpy import ndarray
 
@@ -17,6 +18,7 @@
     create_new_diagnostic_cube,
     generate_mandatory_attributes,
 )
+from improver.utilities.cube_checker import spatial_coords_match
 from improver.utilities.cube_manipulation import (
     enforce_coordinate_ordering,
     get_dim_coord_names,
@@ -404,3 +406,247 @@ def fast_linear_fit(
 
     intercept = y_mean - grad * x_mean
     return grad, intercept
+
+
+class CalculateClimateAnomalies(BasePlugin):
+    """Utility functionality to convert an input cube of data to a cube containing
+    anomaly data. If a forecast and a climatological mean are supplied, the anomaly
+    calculated will be forecast - mean. If the climatological standard deviation is
+    also supplied, then a standardised anomaly is calculated as
+    (forecast - mean) / standard deviation"""
+
+    def __init__(
+        self,
+        ignore_temporal_mismatch: bool = False,
+    ) -> None:
+        """
+        Initialise class.
+
+        Args:
+            ignore_temporal_mismatch:
+                If True, ignore mismatch in time coordinates between
+                the input cubes. Default is False. Set to True when interested
+                in the anomaly of a diagnostic cube relative to mean and
+                standard deviation cubes of a different period.
+        """
+        self.ignore_temporal_mismatch = ignore_temporal_mismatch
+
+    @staticmethod
+    def verify_units_match(
+        diagnostic_cube: Cube, mean_cube: Cube, std_cube: Optional[Cube] = None
+    ) -> None:
+        """Check that all cubes have the same units. E.g. to prevent accidental
+        use of cubes with rate data with cubes with accumulation data."""
+        errors = []
+        if mean_cube.units != diagnostic_cube.units:
+            errors.append(
+                f"The mean cube must have the same units as the diagnostic cube."
+                f"The following units were found: {mean_cube.units},"
+                f"{diagnostic_cube.units}"
+            )
+        if std_cube and std_cube.units != str(diagnostic_cube.units):
+            errors.append(
+                f"The standard deviation cube must have the same units "
+                "as the diagnostic cube."
+                f"The following units were found: {std_cube.units},"
+                f"{diagnostic_cube.units}"
+            )
+
+        if errors:
+            raise ValueError("\n".join(errors))
+
+    @staticmethod
+    def verify_spatial_coords_match(
+        diagnostic_cube: Cube, mean_cube: Cube, std_cube: Optional[Cube] = None
+    ) -> None:
+        """Check that all cubes have the same spatial coordinates (i.e. the same grid
+        coordinates or spot index coordinates)."""
+        cubes_to_check = [
+            cube for cube in [diagnostic_cube, mean_cube, std_cube] if cube is not None
+        ]
+        # Check if spot_index coordinate present for some but not all cubes
+        spot_index_present = list(
+            set(["spot_index" in get_dim_coord_names(cube) for cube in cubes_to_check])
+        )
+        if len(spot_index_present) > 1:
+            raise ValueError(
+                "The cubes must all have the same spatial coordinates. Some cubes"
+                "contain spot_index coordinates and some do not."
+            )
+        elif spot_index_present[0]:
+            if any(
+                not np.array_equal(
+                    cube.coord("spot_index").points,
+                    diagnostic_cube.coord("spot_index").points,
+                )
+                for cube in cubes_to_check
+            ):
+                raise ValueError(
+                    "Mismatching spot_index coordinates were found on the input cubes."
+                )
+        elif not spatial_coords_match(cubes_to_check):
+            raise ValueError("The spatial coordinates must match.")
+
+    def verify_time_coords_match(
+        self, diagnostic_cube: Cube, mean_cube: Cube, std_cube: Optional[Cube] = None
+    ) -> Cube:
+        """Check that all cubes have compatible time coordinates."""
+        errors = []
+
+        # Check if mean and standard deviation cubes have the same bounds
+        if std_cube and not np.array_equal(
+            mean_cube.coord("time").bounds, std_cube.coord("time").bounds
+        ):
+            errors.append(
+                "The mean and standard deviation cubes must have compatible bounds. "
+                "The following bounds were found: "
+                f"mean_cube bounds: {mean_cube.coord('time').bounds},"
+                f"std_cube bounds: {std_cube.coord('time').bounds}"
+            )
+
+        # Check if diagnostic cube's time point falls within the bounds of the
+        # mean cube.
+        # The verification of the standard deviation cube's bounds suitably matching the
+        # diagnostic cube's is covered implicitly due to the above code chunk.
+        if not self.ignore_temporal_mismatch:
+            diagnostic_max = diagnostic_cube.coord("time").cell(-1).point
+            diagnostic_min = diagnostic_cube.coord("time").cell(0).point
+            mean_time_bounds = mean_cube.coord("time").bounds[0]
+            mean_time_bounds = [
+                mean_cube.coord("time").units.num2date(bound)
+                for bound in mean_time_bounds
+            ]
+            if not (
+                diagnostic_max <= mean_time_bounds[1]
+                and diagnostic_min >= mean_time_bounds[0]
+            ):
+                errors.append(
+                    "The diagnostic cube's time points must fall within the bounds "
+                    "of the mean cube. The following was found: "
+                    f"diagnostic cube maximum: {diagnostic_max},"
+                    f"diagnostic cube minimum: {diagnostic_min},"
+                    f"mean cube upper bound: {mean_time_bounds[1]},"
+                    f"mean cube lower bound:{mean_time_bounds[0]}"
+                )
+
+        if errors:
+            raise ValueError("\n".join(errors))
+
+    @staticmethod
+    def calculate_anomalies(
+        diagnostic_cube: Cube, mean_cube: Cube, std_cube: Optional[Cube] = None
+    ) -> ndarray:
+        """Calculate climate anomalies from the input cubes."""
+        anomalies_data = diagnostic_cube.data - mean_cube.data
+        if std_cube:
+            anomalies_data = anomalies_data / std_cube.data
+        return anomalies_data
+
+    @staticmethod
+    def _update_cube_name_and_units(
+        output_cube: Cube, standard_anomaly: bool = False
+    ) -> None:
+        """This method updates the name and units of the given output cube based on
+        whether it represents a standard anomaly or not. The cube is modified in place.
+
+        Args:
+            output_cube:
+                The cube to be updated.
+            standard_anomaly:
+                Flag indicating if the output is a standard anomaly. If True,
+                a "_standard_anomaly" suffix is added to the name.
+                If False, an "_anomaly" suffix is added to the name.
+        """
+
+        # If standard_anomaly is true, the output is a standard anomaly
+        # and units are changed
+        suffix = "_standard_anomaly" if standard_anomaly else "_anomaly"
+
+        try:
+            output_cube.standard_name = output_cube.standard_name + suffix
+        except ValueError:
+            output_cube.long_name = output_cube.standard_name + suffix
+
+        return output_cube
+
+    @staticmethod
+    def _add_reference_epoch_metadata(output_cube: Cube, mean_cube: Cube) -> None:
+        """Add epoch metadata to describe the creation of the anomaly.
+        1. Add a scalar coordinate 'reference epoch' to describe the time period over
+        which the climatology was calculated:
+        - timebound inherited from mean cube,
+        2. Add a cell method called 'anomaly' to describe the operation
+        that was performed.
+
+        The output_cube is modified in place.
+        """
+        reference_epoch = iris.coords.AuxCoord(
+            points=mean_cube.coord("time").points,
+            bounds=mean_cube.coord("time").bounds
+            if mean_cube.coord("time").has_bounds()
+            else None,
+            long_name="reference_epoch",
+            units=mean_cube.coord("time").units,
+        )
+
+        output_cube.add_aux_coord(reference_epoch)
+
+        cell_method = CellMethod(method="anomaly", coords=["reference_epoch"])
+        output_cube.add_cell_method(cell_method)
+
+    def _create_output_cube(
+        self,
+        diagnostic_cube: Cube,
+        mean_cube: Cube,
+        std_cube: Optional[Cube] = None,
+    ) -> Cube:
+        """
+        Create a final anomalies cube by copying the diagnostic cube, updating its data
+        with the anomalies data, and updating its metadata.
+        """
+        # Use the diagnostic cube as a template for the output cube
+        output_cube = diagnostic_cube.copy()
+
+        # Update the cube name and units
+        standard_anomaly = True if std_cube else False
+        self._update_cube_name_and_units(output_cube, standard_anomaly)
+
+        # Create the reference epoch coordinate and cell method
+        self._add_reference_epoch_metadata(output_cube, mean_cube)
+
+        anomalies_data = self.calculate_anomalies(diagnostic_cube, mean_cube, std_cube)
+
+        output_cube.data = anomalies_data
+
+        return output_cube
+
+    def process(
+        self,
+        diagnostic_cube: Cube,
+        mean_cube: Cube,
+        std_cube: Cube = None,
+    ) -> Cube:
+        """Calculate anomalies from the input cubes.
+
+        Args:
+            diagnostic_cube:
+                Cube containing the data to be converted to anomalies.
+            mean_cube:
+                Cube containing the mean data to be used for the
+                calculation of anomalies.
+            std_cube:
+                Cube containing the standard deviation data to be used for the
+                calculation of standardised anomalies. If not provided,
+                only anomalies (not standardised anomalies) will be
+                calculated.
+        Returns:
+            Cube containing the result of the calculation with metadata reflecting
+            the operation.
+        """
+        self.verify_units_match(diagnostic_cube, mean_cube, std_cube)
+        self.verify_spatial_coords_match(diagnostic_cube, mean_cube, std_cube)
+        self.verify_time_coords_match(diagnostic_cube, mean_cube, std_cube)
+
+        anomalies_cube = self._create_output_cube(diagnostic_cube, mean_cube, std_cube)
+
+        return anomalies_cube