Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MOBT-211: mosg__model_run attribute handling in weather symbols #1670

Merged
merged 14 commits into from
Feb 18, 2022
Merged
100 changes: 99 additions & 1 deletion improver/blending/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@
# POSSIBILITY OF SUCH DAMAGE.
"""Utilities to support weighted blending"""

from datetime import datetime
from typing import Dict, List, Optional

import numpy as np
from iris.cube import Cube
from iris.cube import Cube, CubeList
from numpy import int64

from improver.blending import MODEL_BLEND_COORD, MODEL_NAME_COORD
Expand All @@ -44,6 +45,7 @@
)
from improver.metadata.constants.time_types import TIME_COORDS
from improver.metadata.forecast_times import add_blend_time, forecast_period_coord
from improver.utilities.cube_checker import is_model_blended
from improver.utilities.round import round_close
from improver.utilities.temporal import cycletime_to_number

Expand Down Expand Up @@ -225,3 +227,99 @@ def _get_cycletime_point(cube: Cube, cycletime: str) -> int64:
cycletime, time_unit=frt_units, calendar=frt_calendar
)
return round_close(cycletime_point, dtype=np.int64)


def set_record_run_attr(
cubelist: CubeList, record_run_attr: str, model_id_attr: Optional[str]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default value for model_id_attr in ModalWeatherCode is None, which this method now says it won't accept. We either need to not call this method in this instance, or change the typing back to include None.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you referring to the change from Union[str, None] to Optional[str]? The use of Optional[str] where None is allowed is preferred to Union[str, None] by mypy: https://mypy.readthedocs.io/en/stable/kinds_of_types.html#optional-types-and-the-none-type.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I was. I didn't know that. Thanks Gavin!

) -> None:
"""Set a record_run attribute that records the model identifier and
forecast reference time of each cube in the cubelist. From the list of cubes,
pre-existing record_run attributes, model IDs and forecast reference
times are extracted as required to build a new record_run attribute.

The new attribute is applied to each cube in the cubelist in preparation
for blending / combining the cubes. The resulting composite product will
have a record of the contributing models and their associated forecast
reference times.

There are three ways this method may work:

- None of the input cubes have been previously cycle or model blended.
The model_id_attr argument must be provided to enable the model
identifiers to be extracted and used in conjunction with the forecast
reference time to build the record_run attribute.
- All of the input cubes have been previously cycle or model blended. The
model_id_attr argument is not required as a new record_run attribute
will be constructed by combining the existing record_run attributes on
each input cube.
- Some of the input cubes have been previously cycle or model blended, and
some have not. The model_id_attr argument must be provided so that those
cubes without an existing record_run attribute can be interrogated for
their model identifier.

The cubes are modified in place.

Args:
cubelist:
Cubes from which to obtain model and cycle information, and to which
the resulting run record attribute is added.
record_run_attr:
The name of the record run attribute that is to be created.
model_id_attr:
The name of the attribute that contains the source model information.

Raises:
ValueError: If model_id_attr is not set and is required to construct a
new record_run_attr.
gavinevans marked this conversation as resolved.
Show resolved Hide resolved
Exception: A cube has previously been model blended but contains no
record_run_attr.
Exception: The model_id_attr name provided is not present on one or more
of the input cubes.
"""
if not model_id_attr and not all(
[record_run_attr in cube.attributes for cube in cubelist]
):
raise ValueError(
f"Not all input cubes contain an existing {record_run_attr} attribute. "
"A model_id_attr argument must be provided to enable the construction "
f"of a new {record_run_attr} attribute."
)

cycle_strings = []
for cube in cubelist:
if record_run_attr in cube.attributes:
model_attrs = cube.attributes[record_run_attr].splitlines()
for model_attr in model_attrs:
if model_attr not in cycle_strings:
cycle_strings.append(model_attr)
continue

if is_model_blended(cube):
raise Exception(
"This cube has been through model blending but there is no "
f"record_run attribute. This indicates cube {cube.name()} has "
"been previously model blended without recording the cycles "
"from which data was taken. It is not possible to create a "
"record_run attribute."
)

if model_id_attr not in cube.attributes:
raise Exception(
f"Failure to record run information in '{record_run_attr}' "
"during blend: no model id attribute found in cube. "
f"Cube attributes: {cube.attributes}"
)

cycle = datetime.utcfromtimestamp(
cube.coord("forecast_reference_time").points[0]
)
cycle_str = cycle.strftime("%Y%m%dT%H%MZ")

blending_weight = "" # TODO: include actual blending weight here.
run_attr = f"{cube.attributes[model_id_attr]}:{cycle_str}:{blending_weight}"
gavinevans marked this conversation as resolved.
Show resolved Hide resolved
if run_attr not in cycle_strings:
cycle_strings.append(run_attr)

cycle_strings.sort()
for cube in cubelist:
cube.attributes[record_run_attr] = "\n".join(cycle_strings)
30 changes: 2 additions & 28 deletions improver/blending/weighted_blend.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
whole dimension."""

import warnings
from datetime import datetime
from typing import List, Optional, Union

import iris
Expand All @@ -45,7 +44,7 @@

from improver import BasePlugin, PostProcessingPlugin
from improver.blending import MODEL_BLEND_COORD, MODEL_NAME_COORD
from improver.blending.utilities import find_blend_dim_coord
from improver.blending.utilities import find_blend_dim_coord, set_record_run_attr
from improver.metadata.constants import FLOAT_DTYPE, PERC_COORD
from improver.metadata.forecast_times import rebadge_forecasts_as_latest_cycle
from improver.utilities.cube_manipulation import (
Expand Down Expand Up @@ -157,31 +156,6 @@ def _create_model_coordinates(self, cubelist: Union[List[Cube], CubeList]) -> No
cube.add_aux_coord(new_model_id_coord)
cube.add_aux_coord(new_model_coord)

def _set_record_run_attr(self, cubelist: CubeList) -> None:
"""Set a model-cycle record attribute if configured."""
cycle_strings = []
for cube in cubelist:
if self.record_run_attr in cube.attributes:
cycle_strings.extend(cube.attributes[self.record_run_attr].splitlines())
continue
cycle = datetime.utcfromtimestamp(
cube.coord("forecast_reference_time").points[0]
)
cycle_str = cycle.strftime("%Y%m%dT%H%MZ")
if self.model_id_attr not in cube.attributes:
raise Exception(
f"Failure to record run information in '{self.record_run_attr}' "
"during blend: no model id attribute found in cube. "
f"Cube attributes: {cube.attributes}"
)
blending_weight = "" # TODO: include actual blending weight here.
cycle_strings.append(
f"{cube.attributes[self.model_id_attr]}:{cycle_str}:{blending_weight}"
)
cycle_strings.sort()
for cube in cubelist:
cube.attributes[self.record_run_attr] = "\n".join(cycle_strings)

@staticmethod
def _remove_blend_time(cube: Cube) -> Cube:
"""If present on input, remove existing blend time coordinate (as this will
Expand Down Expand Up @@ -230,7 +204,7 @@ def process(
)

if self.record_run_attr is not None and self.model_id_attr is not None:
self._set_record_run_attr(cubelist)
set_record_run_attr(cubelist, self.record_run_attr, self.model_id_attr)

if "model" in self.blend_coord:
cubelist = [self._remove_blend_time(cube) for cube in cubelist]
Expand Down
9 changes: 8 additions & 1 deletion improver/cli/wxcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def process(
*cubes: cli.inputcube,
wxtree: cli.inputjson = None,
model_id_attr: str = None,
record_run_attr: str = None,
target_period: int = None,
check_tree: bool = False,
):
Expand All @@ -55,6 +56,9 @@ def process(
Name of attribute recording source models that should be
inherited by the output cube. The source models are expected as
a space-separated string.
record_run_attr:
Name of attribute used to record models and cycles used in
constructing the weather symbols.
target_period:
The period in seconds that the weather symbol being produced should
represent. This should correspond with any period diagnostics, e.g.
Expand Down Expand Up @@ -86,5 +90,8 @@ def process(
raise RuntimeError("Not enough input arguments. See help for more information.")

return WeatherSymbols(
wxtree, model_id_attr=model_id_attr, target_period=target_period
wxtree,
model_id_attr=model_id_attr,
record_run_attr=record_run_attr,
target_period=target_period,
)(CubeList(cubes))
11 changes: 9 additions & 2 deletions improver/cli/wxcode_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@

@cli.clizefy
@cli.with_output
def process(*cubes: cli.inputcube, model_id_attr: str = None):
def process(
*cubes: cli.inputcube, model_id_attr: str = None, record_run_attr: str = None
):
"""Generates a modal weather symbol for the period covered by the input
weather symbol cubes. Where there are different weather codes available
for night and day, the modal code returned is always a day code, regardless
Expand All @@ -50,6 +52,9 @@ def process(*cubes: cli.inputcube, model_id_attr: str = None):
Name of attribute recording source models that should be
inherited by the output cube. The source models are expected as
a space-separated string.
record_run_attr:
Name of attribute used to record models and cycles used in
constructing the weather symbols.

Returns:
iris.cube.Cube:
Expand All @@ -60,4 +65,6 @@ def process(*cubes: cli.inputcube, model_id_attr: str = None):
if not cubes:
raise RuntimeError("Not enough input arguments. See help for more information.")

return ModalWeatherCode(model_id_attr=model_id_attr)(cubes)
return ModalWeatherCode(
model_id_attr=model_id_attr, record_run_attr=record_run_attr,
)(cubes)
17 changes: 17 additions & 0 deletions improver/utilities/cube_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,20 @@ def spatial_coords_match(first_cube: Cube, second_cube: Cube) -> bool:
return first_cube.coord(axis="x") == second_cube.coord(
axis="x"
) and first_cube.coord(axis="y") == second_cube.coord(axis="y")


def is_model_blended(cube: Cube) -> bool:
"""
Determine whether a cube has been through model blending by looking for a
"blend_time" coordinate. This doesn't guarantee that multiple models have
contributed to the blend, only that it has been through the model blending
process.

Args:
cube:
The cube to test.

Returns:
True if the cube has been through model blending, false if not.
"""
return "blend_time" in [c.name() for c in cube.coords()]
16 changes: 15 additions & 1 deletion improver/wxcode/modal_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
# POSSIBILITY OF SUCH DAMAGE.
"""Module containing a plugin to calculate the modal weather code in a period."""

from typing import Optional

import iris
import numpy as np
from iris.analysis import Aggregator
Expand All @@ -38,6 +40,7 @@
from scipy import stats

from improver import BasePlugin
from improver.blending.utilities import set_record_run_attr
from improver.utilities.cube_manipulation import MergeCubes

from ..metadata.forecast_times import forecast_period_coord
Expand Down Expand Up @@ -73,7 +76,9 @@ class ModalWeatherCode(BasePlugin):
covered by the input files.
"""

def __init__(self, model_id_attr: str = None):
def __init__(
self, model_id_attr: Optional[str] = None, record_run_attr: Optional[str] = None
):
"""
Set up plugin and create an aggregator instance for reuse

Expand All @@ -82,10 +87,14 @@ def __init__(self, model_id_attr: str = None):
Name of attribute recording source models that should be
inherited by the output cube. The source models are expected as
a space-separated string.
record_run_attr:
Name of attribute used to record models and cycles used in
constructing the weather symbols.
"""
self.aggregator_instance = Aggregator("mode", self.mode_aggregator)

self.model_id_attr = model_id_attr
self.record_run_attr = record_run_attr

# Create the expected cell method for use with single cube inputs
# that do not pass through the aggregator.
Expand Down Expand Up @@ -204,6 +213,11 @@ def process(self, cubes: CubeList) -> Cube:
A single weather code cube with time bounds that span those of
the input weather code cubes.
"""
# Set the record_run attribute on all cubes. This will survive the
# merge and be present on the output.
if self.record_run_attr:
set_record_run_attr(cubes, self.record_run_attr, self.model_id_attr)

cube = MergeCubes()(cubes)
self._unify_day_and_night(cube)

Expand Down
11 changes: 11 additions & 0 deletions improver/wxcode/weather_symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from numpy import ndarray

from improver import BasePlugin
from improver.blending.utilities import set_record_run_attr
from improver.metadata.amend import update_model_id_attr_attribute
from improver.metadata.probabilistic import (
find_threshold_coordinate,
Expand Down Expand Up @@ -98,6 +99,7 @@ def __init__(
self,
wxtree: dict,
model_id_attr: Optional[str] = None,
record_run_attr: Optional[str] = None,
target_period: Optional[int] = None,
) -> None:
"""
Expand All @@ -113,6 +115,9 @@ def __init__(
Name of attribute recording source models that should be
inherited by the output cube. The source models are expected as
a space-separated string.
record_run_attr:
Name of attribute used to record models and cycles used in
constructing the weather symbols.
target_period:
The period in seconds that the weather symbol being produced should
represent. This should correspond with any period diagnostics, e.g.
Expand All @@ -129,6 +134,7 @@ def __init__(
"""

self.model_id_attr = model_id_attr
self.record_run_attr = record_run_attr
self.start_node = list(wxtree.keys())[0]
self.target_period = target_period
self.queries = update_tree_thresholds(wxtree, target_period)
Expand Down Expand Up @@ -544,6 +550,11 @@ def create_symbol_cube(self, cubes: Union[List[Cube], CubeList]) -> Cube:
optional_attributes.update(
update_model_id_attr_attribute(cubes, self.model_id_attr)
)
if self.record_run_attr:
set_record_run_attr(cubes, self.record_run_attr, self.model_id_attr)
optional_attributes.update(
{self.record_run_attr: cubes[0].attributes[self.record_run_attr]}
)

symbols = create_new_diagnostic_cube(
"weather_code",
Expand Down
Loading