Skip to content

Commit

Permalink
Merge pull request NCAS-CMS#702 from davidhassell/weighted-sum
Browse files Browse the repository at this point in the history
`cf.Field.collapse`: Fix "sum", "sum_of_weights" and "sum_of_weights2" when weighted
  • Loading branch information
davidhassell authored Oct 23, 2023
2 parents 70bec02 + 4188259 commit 1668e0b
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 82 deletions.
10 changes: 10 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
version 3.16.0
--------------

**2023-??-??**

* Fix bug that caused `cf.Field.collapse` to give incorrect results
for the "sum", "sum_of_weights" and "sum_of_weights2" methods, only
in the case that weights have been requested
(https://github.com/NCAS-CMS/cf-python/issues/701)

version 3.15.4
--------------

Expand Down
104 changes: 66 additions & 38 deletions cf/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,11 @@
"average",
"sd",
"standard_deviation",
"sum",
"var",
"variance",
# 'sum_of_weights',
# 'sum_of_weights2',
"sum_of_weights",
"sum_of_weights2",
"integral",
"root_mean_square",
)
Expand Down Expand Up @@ -5002,6 +5003,9 @@ def weights(
comp[key] = self._weights_scale(w, scale)

for w in comp.values():
if not measure:
w.override_units("1", inplace=True)

mn = w.minimum()
if mn <= 0:
raise ValueError(
Expand Down Expand Up @@ -6194,7 +6198,7 @@ def collapse(
group_span=None,
group_contiguous=1,
measure=False,
scale=1,
scale=None,
radius="earth",
great_circle=False,
verbose=None,
Expand Down Expand Up @@ -6719,6 +6723,10 @@ def collapse(
.. note:: By default *weights* is `None`, resulting in
**unweighted calculations**.

.. note:: Unless the *method* is ``'integral'``, the
units of the weights are not combined with
the field's units in the collapsed field.

If the alternative form of providing the collapse method
and axes combined as a CF cell methods-like string via the
*method* parameter has been used, then the *axes*
Expand Down Expand Up @@ -6762,57 +6770,58 @@ def collapse(
time you could set ``weights=('area', 'T')``.

measure: `bool`, optional
Create weights which are cell measures, i.e. which
describe actual cell sizes (e.g. cell area) with
appropriate units (e.g. metres squared). By default the
weights are normalised and have arbitrary units.
If True, and *weights* is not `None`, create weights
which are cell measures, i.e. which describe actual
cell sizes (e.g. cell area) with appropriate units
(e.g. metres squared). By default the weights units
are ignored.

Cell measures can be created for any combination of
axes. For example, cell measures for a time axis are the
time span for each cell with canonical units of seconds;
cell measures for the combination of four axes
representing time and three dimensional space could have
canonical units of metres cubed seconds.
axes. For example, cell measures for a time axis are
the time span for each cell with canonical units of
seconds; cell measures for the combination of four
axes representing time and three dimensional space
could have canonical units of metres cubed seconds.

When collapsing with the ``'integral'`` method, *measure*
must be True, and the units of the weights are
incorporated into the units of the returned field
When collapsing with the ``'integral'`` method,
*measure* must be True, and the units of the weights
are incorporated into the units of the returned field
construct.

.. note:: Specifying cell volume weights via
``weights=['X', 'Y', 'Z']`` or
``weights=['area', 'Z']`` (or other equivalents)
will produce **an incorrect result if the
vertical dimension coordinates do not define the
actual height or depth thickness of every cell
in the domain**. In this case,
``weights='volume'`` should be used instead,
which requires the field construct to have a
"volume" cell measure construct.
``weights=['area', 'Z']`` (or other
equivalents) will produce **an incorrect
result if the vertical dimension coordinates
do not define the actual height or depth
thickness of every cell in the domain**. In
this case, ``weights='volume'`` should be
used instead, which requires the field
construct to have a "volume" cell measure
construct.

If ``weights=True`` then care also needs to be
taken, as a "volume" cell measure construct will
be used if present, otherwise the cell volumes
will be calculated using the size of the
vertical coordinate cells.
If ``weights=True`` then care also needs to
be taken, as a "volume" cell measure
construct will be used if present, otherwise
the cell volumes will be calculated using
the size of the vertical coordinate cells.

.. versionadded:: 3.0.2

scale: number or `None`, optional
If set to a positive number then scale the weights so
that they are less than or equal to that number. If
set to `None` the weights are not scaled. In general
the default is for weights to be scaled to lie between
0 and 1; however if *measure* is True then the weights
are never scaled and the value of *scale* is taken as
`None`, regardless of its setting.
set to `None`, the default, then the weights are not
scaled.

*Parameter example:*
To scale all weights so that they lie between 0 and
10 ``scale=10``.
1 ``scale=1``.

.. versionadded:: 3.0.2

.. versionchanged:: 3.16.0 Default changed to `None`

radius: optional
Specify the radius used for calculating the areas of
cells defined in spherical polar coordinates. The
Expand Down Expand Up @@ -7698,6 +7707,12 @@ def collapse(
# ------------------------------------------------------------
# Parse the methods and axes
# ------------------------------------------------------------
if measure and scale is not None:
raise ValueError(
"'scale' must be None when 'measure' is True. "
f"Got: scale={scale!r}"
)

if ":" in method:
# Convert a cell methods string (such as 'area: mean dim3:
# dim2: max T: minimum height: variance') to a CellMethod
Expand Down Expand Up @@ -7916,8 +7931,15 @@ def collapse(
g_weights = None
else:
if measure:
# Never scale weights that are cell measures
scale = None
if method not in (
"integral",
"sum_of_weights",
"sum_of_weights2",
):
raise ValueError(
f"Can't set measure=True for {method!r} "
"collapses"
)
elif method == "integral":
raise ValueError(
f"Must set measure=True for {method!r} collapses"
Expand Down Expand Up @@ -8006,8 +8028,14 @@ def collapse(
d_kwargs = {}
if weights is not None:
if measure:
# Never scale weights that are cell measures
scale = None
if method not in (
"integral",
"sum_of_weights",
"sum_of_weights2",
):
raise ValueError(
f"Can't set measure=True for {method!r} collapses"
)
elif method == "integral":
raise ValueError(
f"Must set measure=True for {method!r} collapses"
Expand Down
6 changes: 3 additions & 3 deletions cf/test/test_Field.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,6 @@ def test_Field_collapse(self):

for axes in axes_combinations(f):
for method in (
"sum",
"min",
"max",
"minimum_absolute_value",
Expand All @@ -451,8 +450,6 @@ def test_Field_collapse(self):
"sample_size",
"sum_of_squares",
"median",
"sum_of_weights",
"sum_of_weights2",
):
for weights in (None, "area"):
a = f.collapse(method, axes=axes, weights=weights).data
Expand All @@ -462,10 +459,13 @@ def test_Field_collapse(self):
)

for method in (
"sum",
"mean",
"mean_absolute_value",
"mean_of_upper_decile",
"root_mean_square",
"sum_of_weights",
"sum_of_weights2",
):
for weights in (None, "area"):
if weights is not None:
Expand Down
Loading

0 comments on commit 1668e0b

Please sign in to comment.