From e450a407725914def8322d2753723318cf018a01 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 2 Aug 2022 02:38:58 +0100 Subject: [PATCH 01/13] Data.where: remove rogue assigment to fix NoneType errors --- cf/data/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/data/data.py b/cf/data/data.py index c9445a02d7..ac09a9c0f4 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -9937,7 +9937,7 @@ def where( # condition units are OK, and convert the condition to a # boolean dask array with the same shape as the data. condition = condition.copy() - condition = condition.set_condition_units(units) + condition.set_condition_units(units) condition = condition.evaluate(d) condition = type(self).asdata(condition) From f86923f927c3b5555740f09abb19be883f39ef30 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 2 Aug 2022 16:11:51 +0100 Subject: [PATCH 02/13] Reinstate test_Data_digitize test case now pre-reqs are satisfied --- cf/test/test_Data.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index b0fcd71d9d..473264a451 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -826,18 +826,14 @@ def test_Data_digitize(self): (np.ma.getmask(e.array) == np.ma.getmask(b)).all() ) - # TODODASK: Reinstate the following test when - # __sub__, minimum, and maximum have - # been daskified - - # e.where( - # cf.set([e.minimum(), e.maximum()]), - # cf.masked, - # e - 1, - # inplace=True, - # ) - # f = d.digitize(bins, upper=upper) - # self.assertTrue(e.equals(f, verbose=2)) + e.where( + cf.set([e.minimum(), e.maximum()]), + cf.masked, + e - 1, + inplace=True, + ) + f = d.digitize(bins, upper=upper) + self.assertTrue(e.equals(f, verbose=2)) # Check returned bins bins = [2, 6, 10, 50, 100] From 1aec6619861be948ec7bd9860e9409d2cd8e9642 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 2 Aug 2022 16:56:48 +0100 Subject: [PATCH 03/13] Query.set_condition_units: stop extra dim. being added if unitless --- cf/query.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cf/query.py b/cf/query.py index 67932f9f86..6d3eb99587 100644 --- a/cf/query.py +++ b/cf/query.py @@ -788,10 +788,7 @@ def set_condition_units(self, units): return value_units = getattr(value, "Units", None) - if value_units is None: - # Value has no units - value = Data(value, units=units) - else: + if value_units is not None: # Value already has units try: value.Units = units From 5bb57667f5af3cd69cf373c84d45e53459293291 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 3 Aug 2022 19:49:27 +0100 Subject: [PATCH 04/13] Data.digitize: add missing logic to process delete_bins list --- cf/data/data.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cf/data/data.py b/cf/data/data.py index ac09a9c0f4..f6eecf93a9 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -1720,7 +1720,14 @@ def digitize( # Digitise the array dx = d.to_dask_array() + dx = da.digitize(dx, bins, right=upper) + if delete_bins: + for n, db in enumerate(delete_bins): + db -= n + dx = np.ma.where(dx == db, np.ma.masked, dx) + dx = np.ma.where(dx > db, dx - 1, dx) + d._set_dask(dx) d.override_units(_units_None, inplace=True) From 3d432513d5d4f5ba1b45f1f67fb33d2a299898a9 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 10 Aug 2022 09:16:13 +0100 Subject: [PATCH 05/13] Data.digitize: move delete_bins processing from Dask- to cf- space --- cf/data/data.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index f6eecf93a9..ab2745f370 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -1720,16 +1720,19 @@ def digitize( # Digitise the array dx = d.to_dask_array() - dx = da.digitize(dx, bins, right=upper) + d._set_dask(dx) + d.override_units(_units_None, inplace=True) + + # More elegant to handle 'delete_bins' in cf- rather than Dask- space + # i.e. using cf.where with d in-place rather than da.where with dx + # just after the digitize operation above (cf.where already applies + # equivalent logic element-wise). if delete_bins: for n, db in enumerate(delete_bins): db -= n - dx = np.ma.where(dx == db, np.ma.masked, dx) - dx = np.ma.where(dx > db, dx - 1, dx) - - d._set_dask(dx) - d.override_units(_units_None, inplace=True) + d.where(d == db, np.ma.masked, d, inplace=True) + d.where(d > db, d - 1, d, inplace=True) if return_bins: if two_d_bins is None: From d18a69a943e5282e2f3754f69f2fcb7cea109e80 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Wed, 10 Aug 2022 09:31:43 +0100 Subject: [PATCH 06/13] Data.digitize: clarify nature of (pre-existing) delete_bins logic --- cf/data/data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index ab2745f370..10659ca7cb 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -1731,8 +1731,10 @@ def digitize( if delete_bins: for n, db in enumerate(delete_bins): db -= n - d.where(d == db, np.ma.masked, d, inplace=True) - d.where(d > db, d - 1, d, inplace=True) + d.where(d == db, np.ma.masked, None, inplace=True) + # x = d - 1 rather than = d here since there is one less bin + # therefore we need to adjust to the new corresponding indices + d.where(d > db, d - 1, None, inplace=True) if return_bins: if two_d_bins is None: From 39faa6cb35fe1c402a4ca9ec5faee9460a495add Mon Sep 17 00:00:00 2001 From: Sadie Louise Bartholomew Date: Fri, 18 Nov 2022 16:15:38 +0000 Subject: [PATCH 07/13] Revert "Query.set_condition_units: stop ... if unitless" This reverts commit 1aec6619861be948ec7bd9860e9409d2cd8e9642. --- cf/query.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cf/query.py b/cf/query.py index 6d3eb99587..67932f9f86 100644 --- a/cf/query.py +++ b/cf/query.py @@ -788,7 +788,10 @@ def set_condition_units(self, units): return value_units = getattr(value, "Units", None) - if value_units is not None: + if value_units is None: + # Value has no units + value = Data(value, units=units) + else: # Value already has units try: value.Units = units From 793e7ce74280d157ed13f9a65900c1d7830de8d8 Mon Sep 17 00:00:00 2001 From: Sadie Louise Bartholomew Date: Fri, 18 Nov 2022 18:16:35 +0000 Subject: [PATCH 08/13] Query.set_condition_units: handle sequences of Data to fix bug --- cf/query.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cf/query.py b/cf/query.py index 67932f9f86..0e7e7e43b0 100644 --- a/cf/query.py +++ b/cf/query.py @@ -1,4 +1,5 @@ import logging +from collections.abc import Iterable from copy import deepcopy from operator import __and__ as operator_and from operator import __or__ as operator_or @@ -790,7 +791,11 @@ def set_condition_units(self, units): value_units = getattr(value, "Units", None) if value_units is None: # Value has no units - value = Data(value, units=units) + if isinstance(value, Iterable): # may be a sequence of Data + value = Data.concatenate(value) + value.Units = units + else: + value = Data(value, units=units) else: # Value already has units try: From 0bf9b01c9c024690c7c3a2b473156d7542286939 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 22 Nov 2022 12:06:55 +0000 Subject: [PATCH 09/13] Update cf/data/data.py by fixing comment grammar Co-authored-by: David Hassell --- cf/data/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/data/data.py b/cf/data/data.py index 74d57bae40..ec9d64f75c 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -1795,7 +1795,7 @@ def digitize( for n, db in enumerate(delete_bins): db -= n d.where(d == db, np.ma.masked, None, inplace=True) - # x = d - 1 rather than = d here since there is one less bin + # x = d - 1 rather than = d here since there is one fewer bin # therefore we need to adjust to the new corresponding indices d.where(d > db, d - 1, None, inplace=True) From 999f82797a9d167d286b38f6d54ebb57e4b57ec1 Mon Sep 17 00:00:00 2001 From: Sadie Louise Bartholomew Date: Tue, 22 Nov 2022 18:45:01 +0000 Subject: [PATCH 10/13] Query.set_condition_units: apply fix to handle set & range inputs Co-authored-by: David Hassell --- cf/query.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/cf/query.py b/cf/query.py index cb7d0847a0..68ae24fbd3 100644 --- a/cf/query.py +++ b/cf/query.py @@ -874,14 +874,33 @@ def set_condition_units(self, units): value_units = getattr(value, "Units", None) if value_units is None: # Value has no units - if isinstance(value, Iterable): # may be a sequence of Data - value = Data.concatenate(value) - value.Units = units + if self.operator in ("wi", "wo", "set"): + # value is a sequence of things that may or may not + # already have units + new = [] + for v in value: + v_units = getattr(v, "Units", None) + if v_units is None: + v = Data(v, units=units) + else: + try: + v = v.copy() + v.Units = units + except ValueError: + raise ValueError( + f"Units {units!r} are not equivalent to " + f"query condition units {v_units!r}" + ) + + new.append(v) + + value = new else: value = Data(value, units=units) else: # Value already has units try: + value = value.copy() value.Units = units except ValueError: raise ValueError( @@ -891,6 +910,8 @@ def set_condition_units(self, units): self._value = value + self._value = value + # ---------------------------------------------------------------- # Deprecated attributes and methods # ---------------------------------------------------------------- From 33c9c21f841afa7c5519b13c68ebdb8da80dbee5 Mon Sep 17 00:00:00 2001 From: Sadie Louise Bartholomew Date: Tue, 22 Nov 2022 19:43:15 +0000 Subject: [PATCH 11/13] Query.set_condition_units: consolidate w/ inner helper function --- cf/query.py | 64 ++++++++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/cf/query.py b/cf/query.py index 68ae24fbd3..4c86653617 100644 --- a/cf/query.py +++ b/cf/query.py @@ -858,13 +858,29 @@ def set_condition_units(self, units): """ + def get_and_set_value_units(v, u): + """Helper method to simplify logic to set specified units.""" + v_units = getattr(v, "Units", None) + if v_units is None: # Value 'v' has no units + v = Data(v, units=u) + else: # Value 'v' already has units + try: + v = v.copy() + v.Units = u + except ValueError: + raise ValueError( + f"Units {u!r} are not equivalent to " + f"query condition units {v_units!r}" + ) + + return v + units = Units(units) compound = self._compound if compound: for r in compound: r.set_condition_units(units) - return value = self._value @@ -872,43 +888,17 @@ def set_condition_units(self, units): return value_units = getattr(value, "Units", None) - if value_units is None: - # Value has no units - if self.operator in ("wi", "wo", "set"): - # value is a sequence of things that may or may not - # already have units - new = [] - for v in value: - v_units = getattr(v, "Units", None) - if v_units is None: - v = Data(v, units=units) - else: - try: - v = v.copy() - v.Units = units - except ValueError: - raise ValueError( - f"Units {units!r} are not equivalent to " - f"query condition units {v_units!r}" - ) - - new.append(v) - - value = new - else: - value = Data(value, units=units) + if self.operator in ("wi", "wo", "set"): + # Value is a sequence of things that may or may not + # already have units + new_values = [] + for v in value: + v = get_and_set_value_units(v, units) + new_values.append(v) + + value = new_values else: - # Value already has units - try: - value = value.copy() - value.Units = units - except ValueError: - raise ValueError( - f"Units {units!r} are not equivalent to " - f"query condition units {value_units!r}" - ) - - self._value = value + value = get_and_set_value_units(value, units) self._value = value From 4ef9c09eb1979d6315fe0f77286b772a874a0ddc Mon Sep 17 00:00:00 2001 From: Sadie Louise Bartholomew Date: Tue, 22 Nov 2022 19:50:54 +0000 Subject: [PATCH 12/13] Query.set_condition_units: remove now-superfluous import --- cf/query.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cf/query.py b/cf/query.py index 4c86653617..1ee03abe52 100644 --- a/cf/query.py +++ b/cf/query.py @@ -1,5 +1,4 @@ import logging -from collections.abc import Iterable from copy import deepcopy from operator import __and__ as operator_and from operator import __or__ as operator_or From cb7d1e86d4619b114b1871fa2f6f1a2b38bbaaf3 Mon Sep 17 00:00:00 2001 From: "Sadie L. Bartholomew" Date: Tue, 6 Dec 2022 15:56:53 +0000 Subject: [PATCH 13/13] Address feedback by applying linting fixes for linter checks pass --- cf/query.py | 4 ++-- cf/read_write/netcdf/netcdfread.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cf/query.py b/cf/query.py index 1ee03abe52..70f81efd97 100644 --- a/cf/query.py +++ b/cf/query.py @@ -857,8 +857,9 @@ def set_condition_units(self, units): """ + def get_and_set_value_units(v, u): - """Helper method to simplify logic to set specified units.""" + """Helper method to simplify setting of specified units.""" v_units = getattr(v, "Units", None) if v_units is None: # Value 'v' has no units v = Data(v, units=u) @@ -886,7 +887,6 @@ def get_and_set_value_units(v, u): if value is None: return - value_units = getattr(value, "Units", None) if self.operator in ("wi", "wo", "set"): # Value is a sequence of things that may or may not # already have units diff --git a/cf/read_write/netcdf/netcdfread.py b/cf/read_write/netcdf/netcdfread.py index 276ca40d13..1e3f178ec2 100644 --- a/cf/read_write/netcdf/netcdfread.py +++ b/cf/read_write/netcdf/netcdfread.py @@ -549,4 +549,4 @@ def _create_cfanetcdfarray( # instance array = self.implementation.initialise_CFANetCDFArray(**kwargs) - return array, kwargs \ No newline at end of file + return array, kwargs