Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LAMA to Dask: fixes to Query & Data.digitize #427

Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1724,6 +1724,18 @@ def digitize(
d._set_dask(dx)
d.override_units(_units_None, inplace=True)

# More elegant to handle 'delete_bins' in cf- rather than Dask- space
# i.e. using cf.where with d in-place rather than da.where with dx
# just after the digitize operation above (cf.where already applies
# equivalent logic element-wise).
if delete_bins:
for n, db in enumerate(delete_bins):
db -= n
d.where(d == db, np.ma.masked, None, inplace=True)
# x = d - 1 rather than = d here since there is one less bin
sadielbartholomew marked this conversation as resolved.
Show resolved Hide resolved
# therefore we need to adjust to the new corresponding indices
d.where(d > db, d - 1, None, inplace=True)

if return_bins:
if two_d_bins is None:
two_d_bins = np.empty((bins.size - 1, 2), dtype=bins.dtype)
Expand Down Expand Up @@ -9937,7 +9949,7 @@ def where(
# condition units are OK, and convert the condition to a
# boolean dask array with the same shape as the data.
condition = condition.copy()
condition = condition.set_condition_units(units)
condition.set_condition_units(units)
condition = condition.evaluate(d)

condition = type(self).asdata(condition)
Expand Down
7 changes: 6 additions & 1 deletion cf/query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from collections.abc import Iterable
from copy import deepcopy
from operator import __and__ as operator_and
from operator import __or__ as operator_or
Expand Down Expand Up @@ -790,7 +791,11 @@ def set_condition_units(self, units):
value_units = getattr(value, "Units", None)
sadielbartholomew marked this conversation as resolved.
Show resolved Hide resolved
if value_units is None:
# Value has no units
value = Data(value, units=units)
if isinstance(value, Iterable): # may be a sequence of Data
Copy link
Collaborator

@davidhassell davidhassell Nov 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edit Hi Sadie, I think what I wrote here may well be nonsense! Please ignore, and the code snippet and I'll have another think ...

Hi Sadie, not sure this will work in all cases:

  • Data objects are Iterable
  • When value is a sequence, we want it to remain a sequence, not convert it into a Data object, and Data.concatenate doesn't work for non-Data objects

This code might make sense, but it's not very pretty!

IGNORE


        value_units = getattr(value, "Units", None)
        if value_units is None:
            # Value has no units
            if isinstance(value, Iterable) and not isinstance(value, str):
                new = []
                for v in value:
                    value_units = getattr(v, "Units", None)
                    if value_units is None:
                        # Value has no units
                        v = Data(v, units=units)
                    else:
                        # Value already has units
                        try:
                            v.Units = units
                        except ValueError:
                            raise ValueError(
                                f"Units {units!r} are not equivalent to "
                                f"query condition units {value_units!r}"
                            )
                        
                    new.append(v)

                value = new
            else:
                value = Data(value, units=units)        
        else:
            # Value already has units
            try:
                value.Units = units
            except ValueError:
                raise ValueError(
                    f"Units {units!r} are not equivalent to "
                    f"query condition units {value_units!r}"
                )

Copy link
Collaborator

@davidhassell davidhassell Nov 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK - I think I worked out what's going on. The key is that the contents of a "set", "wi" or "wo" iterable are not necessarily Data objects (or rather objects with Units). How about this? There's scope for a bit of code re-use here, but I can't quite see how, yet.

        value_units = getattr(value, "Units", None)
        if value_units is None:
            # Value has no units
            if self.operator in ("wi", "wo", "set"):
                # value is a sequence of things that may or may not
                # already have units
                new = []
                for v in value:
                    v_units = getattr(v, "Units", None)
                    if v_units is None:
                        v = Data(v, units=units)
                    else:
                        try:
                            v = v.copy()
                            v.Units = units
                        except ValueError:
                            raise ValueError(
                                f"Units {units!r} are not equivalent to "
                                f"query condition units {v_units!r}"
                            )
                        
                    new.append(v)

                value = new
            else:
                value = Data(value, units=units)
        else:
            # Value already has units
            try:
                value = value.copy()
                value.Units = units
            except ValueError:
                raise ValueError(
                    f"Units {units!r} are not equivalent to "
                    f"query condition units {value_units!r}"
                )

        self._value = value

We could do with some more units test for this, too ...

Copy link
Member Author

@sadielbartholomew sadielbartholomew Nov 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just added your code (putting you as a formal co-author since you wrote it, though I don't think I can specify you as 'full' author since I'm committing it!) and then consolidated it with an inner helper function. I'll add some new testing first thing tomorrow.

value = Data.concatenate(value)
value.Units = units
else:
value = Data(value, units=units)
else:
# Value already has units
try:
Expand Down
20 changes: 8 additions & 12 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,18 +826,14 @@ def test_Data_digitize(self):
(np.ma.getmask(e.array) == np.ma.getmask(b)).all()
)

# TODODASK: Reinstate the following test when
# __sub__, minimum, and maximum have
# been daskified

# e.where(
# cf.set([e.minimum(), e.maximum()]),
# cf.masked,
# e - 1,
# inplace=True,
# )
# f = d.digitize(bins, upper=upper)
# self.assertTrue(e.equals(f, verbose=2))
e.where(
cf.set([e.minimum(), e.maximum()]),
cf.masked,
e - 1,
inplace=True,
)
f = d.digitize(bins, upper=upper)
self.assertTrue(e.equals(f, verbose=2))

# Check returned bins
bins = [2, 6, 10, 50, 100]
Expand Down