Skip to content

Commit

Permalink
Merge branch 'feature_branch_nbhood_refactor' into mobt_157_nbhood_re…
Browse files Browse the repository at this point in the history
…factor_consolidate_unit_tests_rebased

* feature_branch_nbhood_refactor:
  Mobt 157 nbhood refactor consolidate unit tests part1 (metoppv#1665)
  Adds a filter to the combine CLI for mismatching realizations (metoppv#1656)
  Reduce the memory requirements for read-the-docs (metoppv#1672)
  Further doc-building fixes. (metoppv#1671)
  DOC Fix intersphinx links for docs (metoppv#1668)
  • Loading branch information
fionaRust committed Feb 16, 2022
2 parents e7b0b41 + 28faf19 commit ce72e44
Show file tree
Hide file tree
Showing 16 changed files with 871 additions and 80 deletions.
15 changes: 9 additions & 6 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
conda:
file: doc/rtd_environment.yml
version: 2

build:
image: latest
os: "ubuntu-20.04"
tools:
python: "mambaforge-4.10"

conda:
environment: doc/rtd_environment.yml

python:
version: 3.6
system_packages: true
formats:
- htmlzip
8 changes: 4 additions & 4 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,12 +394,12 @@

# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
"https://docs.python.org/": None,
"https://docs.python.org/3/": None,
"https://scitools-iris.readthedocs.io/en/latest/": None,
"https://scitools.org.uk/cartopy/docs/latest/": None,
"https://scitools.org.uk/cf-units/docs/latest/": None,
"https://docs.scipy.org/doc/numpy/": None,
"https://docs.scipy.org/doc/scipy/reference/": None,
"https://cf-units.readthedocs.io/en/stable/": None,
"https://numpy.org/doc/stable/": None,
"https://docs.scipy.org/doc/scipy-1.6.2/reference/": None,
"https://pandas.pydata.org/pandas-docs/dev/": None,
}

Expand Down
32 changes: 16 additions & 16 deletions improver/cli/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@
@cli.clizefy
@cli.with_output
def process(
*cubes: cli.inputcube, operation="+", new_name=None, broadcast_to_threshold=False,
*cubes: cli.inputcube,
operation="+",
new_name=None,
broadcast_to_threshold=False,
minimum_realizations=None,
):
r"""Combine input cubes.
Expand All @@ -58,26 +62,22 @@ def process(
broadcast_to_threshold (bool):
If True, broadcast input cubes to the threshold coord prior to combining -
a threshold coord must already exist on the first input cube.
minimum_realizations (int):
If specified, the input cubes will be filtered to ensure that only realizations that
include all available lead times are combined. If the number of realizations that
meet this criteria are fewer than this integer, an error will be raised.
Returns:
result (iris.cube.Cube):
Returns a cube with the combined data.
"""
from iris.cube import CubeList

from improver.cube_combiner import CubeCombiner, CubeMultiplier
from improver.cube_combiner import Combine

if not cubes:
raise TypeError("A cube is needed to be combined.")
if new_name is None:
new_name = cubes[0].name()

if operation == "*" or operation == "multiply":
result = CubeMultiplier()(
CubeList(cubes), new_name, broadcast_to_threshold=broadcast_to_threshold,
)

else:
result = CubeCombiner(operation)(CubeList(cubes), new_name)

return result
return Combine(
operation,
broadcast_to_threshold=broadcast_to_threshold,
minimum_realizations=minimum_realizations,
new_name=new_name,
)(CubeList(cubes))
118 changes: 107 additions & 11 deletions improver/cube_combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,104 @@
from improver.utilities.cube_manipulation import (
enforce_coordinate_ordering,
expand_bounds,
filter_realizations,
)


class Combine(BasePlugin):
"""Combine input cubes.
Combine the input cubes into a single cube using the requested operation.
The first cube in the input list provides the template for output metadata.
If coordinates are expanded as a result of this combine operation
(e.g. expanding time for accumulations / max in period) the upper bound of
the new coordinate will also be used as the point for the new coordinate.
"""

def __init__(
self,
operation: str,
broadcast_to_threshold: bool = False,
minimum_realizations: Union[str, int, None] = None,
new_name: str = None,
):
r"""
Args:
operation (str):
An operation to use in combining input cubes. One of:
+, -, \*, add, subtract, multiply, min, max, mean
broadcast_to_threshold (bool):
If True, broadcast input cubes to the threshold coord prior to combining -
a threshold coord must already exist on the first input cube.
minimum_realizations (int):
If specified, the input cubes will be filtered to ensure that only realizations that
include all available lead times are combined. If the number of realizations that
meet this criteria are fewer than this integer, an error will be raised.
Minimum value is 1.
new_name (str):
New name for the resulting dataset.
"""
try:
self.minimum_realizations = int(minimum_realizations)
except TypeError:
if minimum_realizations is not None:
raise
self.minimum_realizations = None
self.new_name = new_name
self.broadcast_to_threshold = broadcast_to_threshold

if operation == "*" or operation == "multiply":
self.plugin = CubeMultiplier(
broadcast_to_threshold=self.broadcast_to_threshold
)
else:
self.plugin = CubeCombiner(operation)

def process(self, cubes: CubeList) -> Cube:
"""
Preprocesses the cubes, then passes them to the appropriate plugin
Args:
cubes (iris.cube.CubeList or list of iris.cube.Cube):
An iris CubeList to be combined.
Returns:
result (iris.cube.Cube):
Returns a cube with the combined data.
Raises:
TypeError:
If input list of cubes is empty
ValueError:
If minimum_realizations aren't met, or less than one were requested.
"""
if not cubes:
raise TypeError("A cube is needed to be combined.")
if self.new_name is None:
self.new_name = cubes[0].name()

if self.minimum_realizations is None:
filtered_cubes = cubes
else:
if self.minimum_realizations < 1:
raise ValueError(
f"Minimum realizations must be at least 1, not {self.minimum_realizations}"
)

cube = filter_realizations(cubes)
realization_count = len(cube.coord("realization").points)
if realization_count < self.minimum_realizations:
raise ValueError(
f"After filtering, number of realizations {realization_count} "
"is less than the minimum number of realizations allowed "
f"({self.minimum_realizations})"
)
filtered_cubes = cube.slices_over("time")

return self.plugin(CubeList(filtered_cubes), self.new_name)


class CubeCombiner(BasePlugin):
"""Plugin for combining cubes using linear operators"""

Expand Down Expand Up @@ -222,8 +317,15 @@ class CubeMultiplier(CubeCombiner):
"""

def __init__(self) -> None:
"""Create a CubeMultiplier plugin"""
def __init__(self, broadcast_to_threshold: bool = False) -> None:
"""Create a CubeMultiplier plugin
Args:
broadcast_to_threshold:
True if the first cube has a threshold coordinate to which the
following cube(s) need(s) to be broadcast prior to combining data.
"""
self.broadcast_to_threshold = broadcast_to_threshold
self.operator = np.multiply
self.normalise = False

Expand Down Expand Up @@ -337,10 +439,7 @@ def _update_cell_methods(
return new_cell_methods

def process(
self,
cube_list: Union[List[Cube], CubeList],
new_diagnostic_name: str,
broadcast_to_threshold: bool = False,
self, cube_list: Union[List[Cube], CubeList], new_diagnostic_name: str
) -> Cube:
"""
Multiply data from a list of input cubes into a single cube. The first
Expand All @@ -353,9 +452,6 @@ def process(
New name for the combined diagnostic. This should be the diagnostic
name, eg rainfall_rate or rainfall_rate_in_vicinity, rather than the
name of the probabilistic output cube.
broadcast_to_threshold:
True if the first cube has a threshold coordinate to which the
following cube(s) need(s) to be broadcast prior to combining data.
Returns:
Cube containing the combined data.
Expand All @@ -368,7 +464,7 @@ def process(
msg = "Expecting 2 or more cubes in cube_list"
raise ValueError(msg)

if broadcast_to_threshold:
if self.broadcast_to_threshold:
cube_list = self._setup_coords_for_broadcast(cube_list)

self._check_dimensions_match(
Expand All @@ -383,7 +479,7 @@ def process(

probabilistic_name = cube_list[0].name()

if broadcast_to_threshold:
if self.broadcast_to_threshold:
diagnostic_name = get_diagnostic_cube_name_from_probability_name(
probabilistic_name
)
Expand Down
8 changes: 6 additions & 2 deletions improver/nbhood/square_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,17 @@ def __init__(
msg = "{} is not a valid neighbourhood_method.".format(neighbourhood_method)
raise ValueError(msg)
if weighted_mode and neighbourhood_method != "circular":
msg = "weighted_mode can only be used if neighbourhood_method is circular"
msg = (
"weighted_mode can only be used if neighbourhood_method is circular."
f" weighted_mode provided: {weighted_mode}, "
f"neighbourhood_method provided: {neighbourhood_method}."
)
raise ValueError(msg)
self.weighted_mode = weighted_mode
self.sum_only = sum_only
self.re_mask = re_mask

def _calculate_neighbourhood(self, data: ndarray, mask: ndarray = None) -> ndarray:
def _calculate_neighbourhood(self, data: ndarray, mask: ndarray) -> ndarray:
"""
Apply neighbourhood processing.
Expand Down
30 changes: 0 additions & 30 deletions improver/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,30 +0,0 @@
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown Copyright 2017-2021 Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
28 changes: 28 additions & 0 deletions improver/utilities/cube_manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,3 +617,31 @@ def expand_bounds(
result_coord.points = result_coord.points.astype(FLOAT_DTYPE)

return result_cube


def filter_realizations(cubes: CubeList) -> Cube:
"""For a given list of cubes, identifies the set of times, filters out any realizations
that are not present at all times and returns a merged cube of the result.
Args:
cubes:
List of cubes to be filtered
Returns:
Cube:
Filtered and merged cube
"""
times = set()
realizations = set()
for cube in cubes:
times.update([c.point for c in cube.coord("time").cells()])
realizations.update(cube.coord("realization").points)
filtered_cubes = CubeList()
for realization in realizations:
realization_cube = cubes.extract(
iris.Constraint(realization=realization)
).merge_cube()
if set([c.point for c in realization_cube.coord("time").cells()]) == times:
filtered_cubes.append(realization_cube)
return filtered_cubes.merge_cube()
4 changes: 4 additions & 0 deletions improver_tests/acceptance/SHA256SUMS
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ b5ca2030a23ba6440c712a659f6f4f3dcbe8566afeef1a68e2e5a6a9b6e7484e ./combine/broa
88b1b0d76a4d1d90a59766d8ad166a9e001556f951201022976ab3fa4e45c1c8 ./combine/cellmethods/kgo.nc
7e4e3da97b7725de34d1e56fbdcb837cc2fc50eaddb29e3e8452ab72aff8e56b ./combine/cellmethods/precipitation_accumulation-PT01H.nc
45893b7a7c10936f33cac4899a01c6f2c5ee2b5abe20fa400144558d3b9a15b3 ./combine/cellmethods/precipitation_is_snow.nc
a4b991d8e0fa174cec415efcf936b38e67fb936c57f7a9e2794beb6629b9b824 ./combine/minimum_realizations/20220128T1900Z-PT0010H00M-temperature_at_screen_level_max-PT01H.nc
4bca51d7208294112240b10e3a36692491472dd3f7ebb072ca030e76a495f14f ./combine/minimum_realizations/20220128T2000Z-PT0011H00M-temperature_at_screen_level_max-PT01H.nc
41daf1da910869fbd9338e31a530aaa68a221828d6e6b8cdbb4302a8e65fc223 ./combine/minimum_realizations/20220128T2100Z-PT0012H00M-temperature_at_screen_level_max-PT01H.nc
b60f6046c86319f8b7ca3b5d7902dbaf3a52f571f30ba56a1a4bc814c42dd341 ./combine/minimum_realizations/kgo.nc
0bd96af6cb5c6caa045e397589dd0ce3b498af837d989fe73326f5e9459c6054 ./construct-reliability-tables/basic/forecast_0.nc
fbc14286b4ce41e2e60df0870ae4911c1b00a38ec96912f43c6187fcaf7d02f6 ./construct-reliability-tables/basic/forecast_1.nc
902e5cb9d3dc5d2b78bb99aff8370f9815adf5064b2caeb7abed73a56a897a43 ./construct-reliability-tables/basic/kgo_single_value_bins.nc
Expand Down
28 changes: 28 additions & 0 deletions improver_tests/acceptance/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,34 @@ def test_minmax_temperatures(tmp_path, minmax):
acc.compare(output_path, kgo_path)


@pytest.mark.parametrize("realizations, gives_error", ((3, False), (4, True)))
def test_minimum_realizations(tmp_path, realizations, gives_error):
"""Test combining with the minimum-realizations filter"""
kgo_dir = acc.kgo_root() / "combine/minimum_realizations"
kgo_path = kgo_dir / "kgo.nc"
temperatures = sorted(kgo_dir.glob("*temperature_at_screen_level*.nc"))
output_path = tmp_path / "output.nc"
args = [
"--operation",
"max",
"--minimum-realizations",
f"{realizations}",
*temperatures,
"--output",
f"{output_path}",
]
if gives_error:
with pytest.raises(
ValueError,
match="After filtering, number of realizations 3 is less than the minimum number "
rf"of realizations allowed \({realizations}\)",
):
run_cli(args)
else:
run_cli(args)
acc.compare(output_path, kgo_path)


def test_combine_accumulation(tmp_path):
"""Test combining precipitation accumulations"""
kgo_dir = acc.kgo_root() / "combine/accum"
Expand Down
Loading

0 comments on commit ce72e44

Please sign in to comment.