Merge branch 'master' into mobt127_filter_in_combine

* master: Modifies wxcode check_tree utility function to report issues with unreachable nodes (metoppv#1637) remove cycle (metoppv#1657) Minor edits to remove raising unnecessary warnings. (metoppv#1646) Change pandas DataFrame.at to DataFrame.loc (metoppv#1655) Adds handling of a model-id-attr to wxcode-modal (metoppv#1634) # Conflicts: # improver_tests/acceptance/SHA256SUMS
MoseleyS · Feb 9, 2022 · a3f8e9c · a3f8e9c
2 parents 5bff889 + e154e72
commit a3f8e9c
Show file tree

Hide file tree

Showing 16 changed files with 193 additions and 115 deletions.
diff --git a/...tation/calibration/reliability_calibration/reliability_calibration_examples.rst b/...tation/calibration/reliability_calibration/reliability_calibration_examples.rst
@@ -12,7 +12,7 @@ The reliability calibration tables returned by this plugin are structured as sho
        Auxiliary coordinates:
             table_row_name                            -                   x                   -                           -                             -
        Scalar coordinates:
-            cycle_hour: 22
+            forecast_reference_time: 2017-11-11 00:00:00, bound=(2017-11-10 00:00:00, 2017-11-11 00:00:00)
             forecast_period: 68400 seconds
        Attributes:
             institution: Met Office

diff --git a/improver/calibration/dataframe_utilities.py b/improver/calibration/dataframe_utilities.py
@@ -326,17 +326,15 @@ def _prepare_dataframes(
         keep="last",
     )
     # Sort to ensure a consistent ordering after removing duplicates.
-    forecast_df.sort_values(
-        by=["blend_time", "percentile", "wmo_id"], inplace=True, ignore_index=True,
+    forecast_df = forecast_df.sort_values(
+        by=["blend_time", "percentile", "wmo_id"], ignore_index=True,
     )
 
     # Remove truth duplicates.
     truth_cols = ["diagnostic", "time", "wmo_id"]
     truth_df = truth_df.drop_duplicates(subset=truth_cols, keep="last",)
     # Sort to ensure a consistent ordering after removing duplicates.
-    truth_df.sort_values(
-        by=truth_cols, inplace=True, ignore_index=True,
-    )
+    truth_df = truth_df.sort_values(by=truth_cols, ignore_index=True)
 
     # Find the common set of WMO IDs.
     common_wmo_ids = sorted(

diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
@@ -1130,7 +1130,7 @@ def mask_cube(cube: Cube, landsea_mask: Cube) -> None:
             IndexError: if the cube and landsea_mask shapes are not compatible.
         """
         try:
-            cube.data[..., ~landsea_mask.data.astype(np.bool)] = np.nan
+            cube.data[..., ~landsea_mask.data.astype(bool)] = np.nan
         except IndexError as err:
             msg = "Cube and landsea_mask shapes are not compatible. {}".format(err)
             raise IndexError(msg)
@@ -1380,7 +1380,6 @@ def process(
             forecast_var,
             number_of_realizations,
         )
-
         return coefficients_cubelist
 
 

diff --git a/improver/calibration/reliability_calibration.py b/improver/calibration/reliability_calibration.py
@@ -233,10 +233,11 @@ def _create_reliability_table_cube(
     ) -> Cube:
         """
         Construct a reliability table cube and populate it with the provided
-        data. The returned cube will include a cycle hour coordinate, which
-        describes the model cycle hour at which the forecast data was produced.
-        It will further include the forecast period, threshold coordinate,
-        and spatial coordinates from the forecast cube.
+        data. The returned cube will include a forecast_reference_time
+        coordinate, which will be the maximum range of bounds of the input
+        forecast reference times, with the point value set to the latest
+        of those in the inputs. It will further include the forecast period,
+        threshold coordinate, and spatial coordinates from the forecast cube.
 
         Args:
             forecast:
@@ -443,11 +444,11 @@ def process(self, historic_forecasts: Cube, truths: Cube) -> Cube:
         whether the data is thresholded below or above a given diagnostic
         threshold.
 
+        `historic_forecasts` and `truths` should have matching validity times.
+
         Args:
             historic_forecasts:
                 A cube containing the historical forecasts used in calibration.
-                These are expected to all have a consistent cycle hour, that is
-                the hour in the forecast reference time.
             truths:
                 A cube containing the thresholded gridded truths used in
                 calibration.

diff --git a/improver/cli/wxcode.py b/improver/cli/wxcode.py
@@ -63,11 +63,11 @@ def process(
             the decision tree. It will only be used if the decision tree
             provided has threshold values defined with an associated period.
         check_tree (bool):
-            If set the decision tree will be checked to see if it conforms to
-            the expected format; the only other argument required is the path
-            to the decision tree. If the tree is found to be valid the required
-            inputs will be listed. Setting this flag will prevent the CLI
-            performing any other actions.
+            If set, the decision tree will be checked to see if it conforms to
+            the expected format and that all nodes can be reached; the only other
+            argument required is the path to the decision tree. If the tree is found
+            to be valid the required inputs will be listed. Setting this flag will
+            prevent the CLI performing any other actions.
 
     Returns:
         iris.cube.Cube:

diff --git a/improver/cli/wxcode_modal.py b/improver/cli/wxcode_modal.py
@@ -36,7 +36,7 @@
 
 @cli.clizefy
 @cli.with_output
-def process(*cubes: cli.inputcube):
+def process(*cubes: cli.inputcube, model_id_attr: str = None):
     """Generates a modal weather symbol for the period covered by the input
     weather symbol cubes. Where there are different weather codes available
     for night and day, the modal code returned is always a day code, regardless
@@ -46,6 +46,10 @@ def process(*cubes: cli.inputcube):
         cubes (iris.cube.CubeList):
             A cubelist containing weather symbols cubes that cover the period
             over which a modal symbol is desired.
+        model_id_attr (str):
+            Name of attribute recording source models that should be
+            inherited by the output cube. The source models are expected as
+            a space-separated string.
 
     Returns:
         iris.cube.Cube:
@@ -56,4 +60,4 @@ def process(*cubes: cli.inputcube):
     if not cubes:
         raise RuntimeError("Not enough input arguments. See help for more information.")
 
-    return ModalWeatherCode()(cubes)
+    return ModalWeatherCode(model_id_attr=model_id_attr)(cubes)
diff --git a/improver/synthetic_data/set_up_test_cubes.py b/improver/synthetic_data/set_up_test_cubes.py
@@ -260,7 +260,7 @@ def _create_dimension_coord(
 
         coord_array = np.array(coord_array)
 
-        if issubclass(coord_array.dtype.type, np.float):
+        if issubclass(coord_array.dtype.type, float):
             # option needed for realizations percentile & probability cube setup
             # and heights coordinate
             coord_array = coord_array.astype(np.float32)

diff --git a/improver/wxcode/modal_code.py b/improver/wxcode/modal_code.py
@@ -73,10 +73,20 @@ class ModalWeatherCode(BasePlugin):
     covered by the input files.
     """
 
-    def __init__(self):
-        """Create an aggregator instance for reuse"""
+    def __init__(self, model_id_attr: str = None):
+        """
+        Set up plugin and create an aggregator instance for reuse
+
+        Args:
+            model_id_attr:
+                Name of attribute recording source models that should be
+                inherited by the output cube. The source models are expected as
+                a space-separated string.
+        """
         self.aggregator_instance = Aggregator("mode", self.mode_aggregator)
 
+        self.model_id_attr = model_id_attr
+
         # Create the expected cell method for use with single cube inputs
         # that do not pass through the aggregator.
         self.mode_cell_method = iris.coords.CellMethod("mode", coords="time")
@@ -205,6 +215,16 @@ def process(self, cubes: CubeList) -> Cube:
             result = cube.collapsed("time", self.aggregator_instance)
         self._set_blended_times(result)
 
+        if self.model_id_attr:
+            # Update contributing models
+            contributing_models = set()
+            for source_cube in cubes:
+                for model in source_cube.attributes[self.model_id_attr].split(" "):
+                    contributing_models.update([model])
+            result.attributes[self.model_id_attr] = " ".join(
+                sorted(list(contributing_models))
+            )
+
         # Handle any unset points where it was hard to determine a suitable mode
         if (result.data == UNSET_CODE_INDICATOR).any():
             self._group_codes(result, cube)

diff --git a/improver/wxcode/utilities.py b/improver/wxcode/utilities.py
@@ -34,6 +34,7 @@
 from typing import Any, Dict, List, Optional
 
 import iris
+import numpy as np
 from iris.cube import Cube
 
 REQUIRED_KEY_WORDS = [
@@ -393,6 +394,10 @@ def check_tree(
         raise ValueError("Decision tree is not a dictionary")
 
     issues = []
+    start_node = list(wxtree.keys())[0]
+    all_targets = np.array(
+        [(n["if_true"], n["if_false"]) for n in wxtree.values()]
+    ).flatten()
     wxtree = update_tree_thresholds(wxtree, target_period)
     valid_codes = list(WX_DICT.keys())
 
@@ -403,6 +408,10 @@ def check_tree(
             if entry not in all_key_words:
                 issues.append(f"Node {node} contains unknown key '{entry}'")
 
+        # Check that this node is reachable, or is the start_node
+        if not ((node == start_node) or node in all_targets):
+            issues.append(f"Unreachable node '{node}'")
+
         # Check that if_diagnostic_missing key points at a if_true or if_false
         # node
         if "if_diagnostic_missing" in items: