From 720fe7de0fbdcddde638466fbec48d018fb7f366 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 9 May 2024 16:17:32 +0100 Subject: [PATCH 01/22] Add suport for anemoi-datasets --- src/earthkit/data/core/fieldlist.py | 21 +++- src/earthkit/data/core/index.py | 12 ++- src/earthkit/data/core/order.py | 83 ++++++++++++---- src/earthkit/data/core/temporary.py | 39 +++++++- src/earthkit/data/indexing/fieldlist.py | 27 +++++ src/earthkit/data/utils/availability.py | 63 +++++++++--- src/earthkit/data/utils/bbox.py | 20 ++-- src/earthkit/data/utils/factorise.py | 127 ++++++++++++++++++++++-- src/earthkit/data/utils/humanize.py | 33 +++++- src/earthkit/data/utils/patterns.py | 31 +++++- 10 files changed, 401 insertions(+), 55 deletions(-) create mode 100644 src/earthkit/data/indexing/fieldlist.py diff --git a/src/earthkit/data/core/fieldlist.py b/src/earthkit/data/core/fieldlist.py index b6a799e3..e293ffa5 100644 --- a/src/earthkit/data/core/fieldlist.py +++ b/src/earthkit/data/core/fieldlist.py @@ -12,7 +12,7 @@ from collections import defaultdict from earthkit.data.core import Base -from earthkit.data.core.index import Index +from earthkit.data.core.index import Index, MaskIndex, MultiIndex from earthkit.data.decorators import cached_method, detect_out_filename from earthkit.data.utils.array import ensure_backend, numpy_backend from earthkit.data.utils.metadata import metadata_argument @@ -1416,3 +1416,22 @@ def to_fieldlist(self, array_backend=None, **kwargs): def _to_array_fieldlist(self, **kwargs): md = [f.metadata() for f in self] return self.from_array(self.to_array(**kwargs), md) + + @classmethod + def new_mask_index(self, *args, **kwargs): + return MaskFieldList(*args, **kwargs) + + @classmethod + def merge(cls, sources): + assert all(isinstance(_, FieldList) for _ in sources) + return MultiFieldList(sources) + + +class MaskFieldList(FieldList, MaskIndex): + def __init__(self, *args, **kwargs): + MaskIndex.__init__(self, *args, **kwargs) + + +class MultiFieldList(FieldList, MultiIndex): + def __init__(self, *args, **kwargs): + MultiIndex.__init__(self, *args, **kwargs) diff --git a/src/earthkit/data/core/index.py b/src/earthkit/data/core/index.py index f0634491..a7323253 100644 --- a/src/earthkit/data/core/index.py +++ b/src/earthkit/data/core/index.py @@ -104,6 +104,7 @@ def build_actions(self, kwargs): raise NotImplementedError() def compare_elements(self, a, b): + assert callable(self.remapping), (type(self.remapping), self.remapping) a_metadata = self.remapping(a.metadata) b_metadata = self.remapping(b.metadata) for k, v in self.actions.items(): @@ -120,18 +121,18 @@ def build_actions(self, kwargs): def ascending(a, b): if a == b: return 0 - if a > b: + if b is None or a > b: return 1 - if a < b: + if a is None or a < b: return -1 raise ValueError(f"{a},{b}") def descending(a, b): if a == b: return 0 - if a > b: + if b is None or a > b: return -1 - if a < b: + if a is None or a < b: return 1 raise ValueError(f"{a},{b}") @@ -169,6 +170,9 @@ def get(self, x): order[int(key)] = i except ValueError: pass + except TypeError: + print('Cannot convert "%s" to int (%s)' % (key, type(key))) + raise try: order[float(key)] = i except ValueError: diff --git a/src/earthkit/data/core/order.py b/src/earthkit/data/core/order.py index 6bc69ef8..17e8906b 100644 --- a/src/earthkit/data/core/order.py +++ b/src/earthkit/data/core/order.py @@ -13,21 +13,25 @@ LOG = logging.getLogger(__name__) -class Remapping: +class Remapping(dict): + # inherit from dict to make it serialisable + def __init__(self, remapping): - self.remapping = {} + super().__init__(remapping) + self.lists = {} for k, v in remapping.items(): - m = re.split(r"\{([^}]*)\}", v) - self.remapping[k] = m + if isinstance(v, str): + v = re.split(r"\{([^}]*)\}", v) + self.lists[k] = v def __call__(self, func): - if self.remapping is None or not self.remapping: + if not self: return func class CustomJoiner: - def format_name(self, x, **kwargs): - return func(x, **kwargs) + def format_name(self, x): + return func(x) def format_string(self, x): return str(x) @@ -37,17 +41,20 @@ def join(self, args): joiner = CustomJoiner() - def wrapped(name, **kwargs): - return self.substitute(name, joiner, **kwargs) + def wrapped(name): + return self.substitute(name, joiner) return wrapped - def substitute(self, name, joiner, **kwargs): - if name in self.remapping: + def substitute(self, name, joiner): + if name in self.lists: + if callable(self.lists[name]): + return self.lists[name]() + lst = [] - for i, bit in enumerate(self.remapping[name]): + for i, bit in enumerate(self.lists[name]): if i % 2: - p = joiner.format_name(bit, **kwargs) + p = joiner.format_name(bit) if p is not None: lst.append(p) else: @@ -55,22 +62,64 @@ def substitute(self, name, joiner, **kwargs): else: lst.append(joiner.format_string(bit)) return joiner.join(lst) - return joiner.format_name(name, **kwargs) + return joiner.format_name(name) def as_dict(self): - return self.remapping + return dict(self) -def build_remapping(mapping): +def _build_remapping(mapping): if mapping is None: return Remapping({}) - if isinstance(mapping, dict): + if not isinstance(mapping, Remapping) and isinstance(mapping, dict): return Remapping(mapping) return mapping +class Patch(dict): + # inherit from dict to make it serialisable + + def __init__(self, proc, name, patch): + self.proc = proc + self.name = name + + if isinstance(patch, dict): + self.patch = lambda x: patch.get(x, x) + elif isinstance(patch, (int, bool, float, str)) or patch is None: + self.patch = lambda x: patch + else: + assert callable(patch) + self.patch = patch + + # For JSON, we simply forward to the remapping + super().__init__(proc.as_dict()) + + def __call__(self, func): + next = self.proc(func) + + def wrapped(name): + result = next(name) + if name == self.name: + result = self.patch(result) + return result + + return wrapped + # assert False, (name, self.proc, self.name, self.patch) + + def as_dict(self): + return dict(self) + + +def build_remapping(mapping, patches=None): + result = _build_remapping(mapping) + if patches: + for k, v in patches.items(): + result = Patch(result, k, v) + return result + + def normalize_order_by(*args, **kwargs): _kwargs = {} for a in args: diff --git a/src/earthkit/data/core/temporary.py b/src/earthkit/data/core/temporary.py index 44d0c102..8d92389a 100644 --- a/src/earthkit/data/core/temporary.py +++ b/src/earthkit/data/core/temporary.py @@ -35,7 +35,7 @@ def __exit__(self, *args, **kwargs): self.cleanup() def cleanup(self): - if self.path is not None: + if self.path is not None and os is not None: os.unlink(self.path) self.path = None @@ -73,3 +73,40 @@ def path(self): def temp_directory(dir=None): return TmpDirectory(dir=dir) + + +class TmpEnv: + """ + A context manager that temporarily sets environment variables. + + Usage: + >>> import os + >>> with TmpEnv(CLIMETLAB_TESTS_FOO="123", CLIMETLAB_TESTS_BAR="456"): + ... print(os.environ["CLIMETLAB_TESTS_FOO"]) + ... print(os.environ["CLIMETLAB_TESTS_BAR"]) + ... + 123 + 456 + >>> print(os.environ.get("CLIMETLAB_TESTS_FOO")) + None + """ + + def __init__(self, **kwargs): + self.kwargs = kwargs + self.previous = {} + + def __enter__(self): + for key, value in self.kwargs.items(): + self.previous[key] = os.environ.get(key) + os.environ[key] = str(value) + + def __exit__(self, type, value, traceback): + for key in self.kwargs.keys(): + if self.previous.get(key) is not None: + os.environ[key] = self.previous[key] + else: + del os.environ[key] + + +def temp_env(**kwargs): + return TmpEnv(**kwargs) diff --git a/src/earthkit/data/indexing/fieldlist.py b/src/earthkit/data/indexing/fieldlist.py new file mode 100644 index 00000000..ac494a4c --- /dev/null +++ b/src/earthkit/data/indexing/fieldlist.py @@ -0,0 +1,27 @@ +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +from earthkit.data.core.fieldlist import FieldList + + +class FieldArray(FieldList): + def __init__(self, fields=None): + self.fields = fields if fields is not None else [] + + def append(self, field): + self.fields.append(field) + + def _getitem(self, n): + return self.fields[n] + + def __len__(self): + return len(self.fields) + + def __repr__(self) -> str: + return f"FieldArray({len(self.fields)})" diff --git a/src/earthkit/data/utils/availability.py b/src/earthkit/data/utils/availability.py index 287c38d0..78a202d2 100644 --- a/src/earthkit/data/utils/availability.py +++ b/src/earthkit/data/utils/availability.py @@ -13,9 +13,11 @@ import itertools import json import os +import pickle import yaml +from earthkit.data.utils import load_json_or_yaml from earthkit.data.utils.factorise import Tree, factorise from .humanize import dict_to_human, list_to_human @@ -47,27 +49,56 @@ def load_yaml(avail): return yaml.load(f, Loader=yaml.SafeLoader) -CONFIG_LOADERS = { - ".json": load_json, - ".yaml": load_yaml, -} - - class Availability: def __init__(self, avail, intervals=None, parser=None): + CONFIG_LOADERS = { + "json": load_json, + "yaml": load_yaml, + # "marslist": Availability.from_mars_list, + "pickle": Availability.from_pickle, + } if not isinstance(avail, Tree): if isinstance(avail, str): config_loader = load_json if len(avail) > 5: - config_loader = CONFIG_LOADERS.get(avail[-5:], load_str) + ext = avail[-10:].split(".")[-1] + config_loader = CONFIG_LOADERS.get(ext, load_str) avail = config_loader(avail) if parser is not None: avail = [parser(item) for item in avail] - avail = factorise(avail, intervals=intervals) + if isinstance(avail, Availability): + avail = avail._tree + + if not isinstance(avail, Tree): + avail = factorise(avail, intervals=intervals) self._tree = avail + def as_mars_list(self, *args, **kwargs): + return self._tree.as_mars_list(*args, **kwargs) + + def as_mars(self, *args, **kwargs): + return self._tree.as_mars(*args, **kwargs) + + def to_pickle(self, filename): + with open(filename, "wb") as f: + pickle.dump(self._tree, f) + + @classmethod + def from_pickle(cls, filename): + with open(filename, "rb") as f: + tree = pickle.load(f) + return cls(tree) + + def to_yaml(self): + return yaml.dump(self._tree) + + @classmethod + def from_yaml(cls, filename): + dic = load_json_or_yaml(filename) + return cls(dic) + @classmethod def from_mars_list(cls, tree, intervals=None): if os.path.exists(tree): @@ -84,18 +115,18 @@ def as_dict(s): requests = [] stack = [] - last = 0 + last = -1 for line in input: line = line.rstrip() cnt = 0 while len(line) > 0 and line[0] == " ": line = line[1:] cnt += 1 - if cnt <= last and stack: + if cnt <= last: requests.append(as_dict(",".join(stack))) - while len(stack) <= cnt: - stack.append(None) - stack[cnt] = line + while len(stack) > cnt: + stack.pop() + stack.append(line) last = cnt if stack: @@ -106,6 +137,12 @@ def as_dict(s): def _repr_html_(self): return "
{}

".format(self.tree()) + def __str__(self): + return self.tree() + + def __repr__(self): + return str(self.tree()) + def select(self, *args, **kwargs): return Availability(self._tree.select(*args, **kwargs)) diff --git a/src/earthkit/data/utils/bbox.py b/src/earthkit/data/utils/bbox.py index 1166bd92..2ae5743b 100644 --- a/src/earthkit/data/utils/bbox.py +++ b/src/earthkit/data/utils/bbox.py @@ -21,7 +21,7 @@ def _normalize(lon, minimum): class BoundingBox: - r"""Represents a geographic bounding box. + r"""Represent a geographic bounding box. Parameters ---------- @@ -35,7 +35,7 @@ class BoundingBox: Eastern longitude (degrees) """ - def __init__(self, *, north, west, south, east): + def __init__(self, *, north, west, south, east, check=True): # Convert to float as these values may come from Numpy self.north = min(float(north), 90.0) self.south = max(float(south), -90.0) @@ -48,17 +48,17 @@ def __init__(self, *, north, west, south, east): else: self.east = _normalize(float(east), self.west) - if self.north < self.south: + if self.north < self.south and check: raise ValueError( f"Invalid bounding box, north={self.north} < south={self.south}" ) - if self.west > self.east: + if self.west > self.east and check: raise ValueError( f"Invalid bounding box, west={self.west} > east={self.east}" ) - if self.east > self.west + 360: + if self.east > self.west + 360 and check: raise ValueError( f"Invalid bounding box, east={self.east} > west={self.west}+360" ) @@ -252,12 +252,18 @@ def from_geopandas(cls, gdf): return BoundingBox.make_invalid() -def bounding_box(obj): +def bounding_box(obj, check=True): if isinstance(obj, BoundingBox): return obj if isinstance(obj, (list, tuple)): - return BoundingBox(north=obj[0], west=obj[1], south=obj[2], east=obj[3]) + return BoundingBox( + north=obj[0], + west=obj[1], + south=obj[2], + east=obj[3], + check=check, + ) obj = get_wrapper(obj) diff --git a/src/earthkit/data/utils/factorise.py b/src/earthkit/data/utils/factorise.py index aeb89a9b..6731fb42 100644 --- a/src/earthkit/data/utils/factorise.py +++ b/src/earthkit/data/utils/factorise.py @@ -397,6 +397,97 @@ def compact(self): def factorise(self): return _factorise(list(self._flatten_tree()), intervals=self._intervals) + def as_mars(self, verb="retrieve", extra=None): + result = [] + for r in self.flatten(): + req = [verb] + if extra is not None: + req.append(extra) + for k, v in r.items(): + v = [str(_) for _ in v] + req.append(f"{k}={'/'.join(v)}") + result.append(",".join(req)) + return "\n".join(result) + + def as_mars_list(self): + text = [] + indent = {} + order = {} + + def V(request, depth): + if not request: + return + + if depth not in indent: + indent[depth] = len(indent) + + text.append(" " * indent[depth]) + + for k in sorted(request.keys()): + if k not in order: + order[k] = len(order) + + sep = "" + for k, v in sorted(request.items(), key=lambda x: order[x[0]]): + text.append(sep) + text.append(k) + text.append("=") + + if isinstance(v[0], Interval): + v = [str(x) for x in v] + + if len(v) == 1: + text.append(v[0]) + else: + start, end = self._to_date_interval(k, v) + if start is not None: + text.append(f"{start}/to/{end}") + else: + text.append("/".join([str(_) for _ in sorted(v)])) + sep = "," + text.append("\n") + + self.visit(V) + + return "".join(str(x) for x in text) + + def _to_date_interval(self, k, v): + class ReturnNoneNone(Exception): + pass + + def parse_date(d): + try: + return datetime.datetime.strptime(d, "%Y%m%d") + except: # noqa: E722 + raise ReturnNoneNone() + + try: + if k != "date": + raise ReturnNoneNone() + + if len(k) < 3: + raise ReturnNoneNone() + + start = parse_date(str(v[0])) + step = parse_date(str(v[1])) - start + + if step != datetime.timedelta(days=1): + raise ReturnNoneNone() + + for i in range(2, len(v)): + current = parse_date(str(v[i])) + previous = parse_date(str(v[i - 1])) + if current - previous != step: + print(int(v[i])) + print( + f"expecting {previous + step} after {previous}, found {current}" + ) + raise ReturnNoneNone() + return str(v[0]), str(v[-1]) + + except ReturnNoneNone: + return None, None + def tree(self): text = [] indent = {} @@ -427,9 +518,13 @@ def V(request, depth): if len(v) == 1: text.append(v[0]) else: - text.append("[") - text.append(", ".join(sorted(str(x) for x in v))) - text.append("]") + start, end = self._to_date_interval(k, v) + if start is not None: + text.append(f"{start}/to/{end}") + else: + text.append("[") + text.append(", ".join([str(_) for _ in sorted(v)])) + text.append("]") sep = ", " text.append("\n") @@ -552,6 +647,25 @@ def sort_columns(self): self.colidx.sort(key=lambda a: self.cols[a]) + def compare_values(self, sa, sb): + if isinstance(sa, tuple) and isinstance(sb, tuple): + for a, b in zip(sa, sb): + n = self.compare_values(a, b) + if n != 0: + return n + return 0 + + if type(sa) is type(sb): + if sa < sb: + return -1 + + if sa > sb: + return 1 + + return 0 + + return self.compare_values(str(type(sa)), str(type(sb))) + def compare_rows(self, a, b): for idx in self.colidx: sa = self.cols[idx].value(a) @@ -560,11 +674,10 @@ def compare_rows(self, a, b): if sa is None and sb is None: continue - if sa < sb: - return -1 + n = self.compare_values(sa, sb) - if sa > sb: - return 1 + if n != 0: + return n return 0 diff --git a/src/earthkit/data/utils/humanize.py b/src/earthkit/data/utils/humanize.py index 8fc1847e..148b7bee 100644 --- a/src/earthkit/data/utils/humanize.py +++ b/src/earthkit/data/utils/humanize.py @@ -13,7 +13,34 @@ def bytes(n): - u = ["", " KiB", " MiB", " GiB", " TiB", " PiB", "EiB", "ZiB", "YiB"] + """ + >>> bytes(4096) + '4 KiB' + >>> bytes(4000) + '3.9 KiB' + """ + if n < 0: + sign = "-" + n -= 0 + else: + sign = "" + + u = ["", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB"] + i = 0 + while n >= 1024: + n /= 1024.0 + i += 1 + return "%s%g%s" % (sign, int(n * 10 + 0.5) / 10.0, u[i]) + + +def base2(n): + """ + >>> base2(4096) + '4K' + >>> base2(4000) + '3.9K' + """ + u = ["", "K", "M", "G", "T", " P", "E", "Z", "Y"] i = 0 while n >= 1024: n /= 1024.0 @@ -254,14 +281,14 @@ def dict_to_human(query): return list_to_human(lst) -def list_to_human(lst): +def list_to_human(lst, conjunction="and"): if not lst: return "??" if len(lst) > 2: lst = [", ".join(lst[:-1]), lst[-1]] - return " and ".join(lst) + return f" {conjunction} ".join(lst) def as_number(value, name, units, none_ok): diff --git a/src/earthkit/data/utils/patterns.py b/src/earthkit/data/utils/patterns.py index da8b3f07..bddaec80 100644 --- a/src/earthkit/data/utils/patterns.py +++ b/src/earthkit/data/utils/patterns.py @@ -86,7 +86,14 @@ def substitute(self, value, name): return self.format % value -TYPES = {"": Any, "int": Int, "float": Float, "date": Datetime, "enum": Enum} +TYPES = { + "": Any, + "int": Int, + "float": Float, + "date": Datetime, + "strftime": Datetime, + "enum": Enum, +} class Constant: @@ -115,6 +122,23 @@ def substitute(self, params): return self.kind.substitute(params[self.name], self.name) +FUNCTIONS = dict(lower=lambda s: s.lower()) + + +class Function: + def __init__(self, value): + functions = value.split("|") + self.name = functions[0] + self.variable = Variable(functions[0]) + self.functions = functions[1:] + + def substitute(self, params): + value = self.variable.substitute(params) + for f in self.functions: + value = FUNCTIONS[f](value) + return value + + class Pattern: def __init__(self, pattern, ignore_missing_keys=False): self.ignore_missing_keys = ignore_missing_keys @@ -125,7 +149,10 @@ def __init__(self, pattern, ignore_missing_keys=False): if i % 2 == 0: self.pattern.append(Constant(p)) else: - v = Variable(p) + if "|" in p: + v = Function(p) + else: + v = Variable(p) self.variables.append(v) self.pattern.append(v) From 7bc07727ee50681fd1dc0f3a09b20d577b00b4f0 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 9 May 2024 17:22:17 +0100 Subject: [PATCH 02/22] Add suport for anemoi-datasets --- src/earthkit/data/core/__init__.py | 12 +-- src/earthkit/data/sources/constants.py | 144 +++++++++++++++++-------- 2 files changed, 107 insertions(+), 49 deletions(-) diff --git a/src/earthkit/data/core/__init__.py b/src/earthkit/data/core/__init__.py index 8bb8337b..81927e73 100644 --- a/src/earthkit/data/core/__init__.py +++ b/src/earthkit/data/core/__init__.py @@ -98,7 +98,7 @@ def order_by(self, *args, **kwargs): """Reorder the elements of the object.""" self._not_implemented() - def unique_values(self, *coords, remapping=None, progress_bar=True): + def unique_values(self, *coords, remapping=None, patches=None, progress_bar=True): """ Given a list of metadata attributes, such as date, param, levels, returns the list of unique values for each attributes @@ -109,7 +109,7 @@ def unique_values(self, *coords, remapping=None, progress_bar=True): assert len(coords) assert all(isinstance(k, str) for k in coords), coords - remapping = build_remapping(remapping) + remapping = build_remapping(remapping, patches) iterable = self if progress_bar: @@ -118,16 +118,16 @@ def unique_values(self, *coords, remapping=None, progress_bar=True): desc=f"Finding coords in dataset for {coords}", ) - dic = defaultdict(dict) + vals = defaultdict(dict) for f in iterable: metadata = remapping(f.metadata) for k in coords: v = metadata(k) - dic[k][v] = True + vals[k][v] = True - dic = {k: tuple(values.keys()) for k, values in dic.items()} + vals = {k: tuple(values.keys()) for k, values in vals.items()} - return dic + return vals # @abstractmethod # def to_points(self, *args, **kwargs): diff --git a/src/earthkit/data/sources/constants.py b/src/earthkit/data/sources/constants.py index 724cfad2..9fc1289b 100644 --- a/src/earthkit/data/sources/constants.py +++ b/src/earthkit/data/sources/constants.py @@ -42,9 +42,8 @@ def _valid_datetime(self): class ConstantMaker: - def __init__(self, source_or_dataset): - self.source_or_dataset = source_or_dataset - self.field = source_or_dataset[0] + def __init__(self, field): + self.field = field self.shape = self.field.shape @cached_method @@ -184,22 +183,59 @@ def cos_solar_zenith_angle(self, date): ) return result.flatten() + def __getattr__(self, name): + if "+" not in name and "-" not in name: + # If we are here, we are looking for a method that does not exist, + # it has to be a method with a time delta. + raise AttributeError(name) + if "+" in name: + fname, delta = name.split("+") + sign = 1 + if "-" in name: + fname, delta = name.split("-") + sign = -1 + method = getattr(self, fname) + + if delta.endswith("h"): + factor = 60 + elif delta.endswith("d"): + factor = 24 * 60 + else: + raise ValueError(f"Invalid time delta {delta} in {name}") + + delta = delta[:-1] + delta = int(delta) + delta = datetime.timedelta(minutes=delta) * factor * sign + + def wrapper(date): + date = date + delta + value = method(date) + return value + + return wrapper + class ConstantField(Field): - def __init__(self, date, param, proc, shape, geometry, backend): + def __init__(self, maker, date, param, proc, number=None, array_backend=None): + self.maker = maker self.date = date self.param = param self.proc = proc - self._shape = shape - self._geometry = geometry + self.number = number + # self._shape = shape + # self._geometry = self.maker.field.metadata().geography d = dict( valid_datetime=date if isinstance(date, str) else date.isoformat(), param=param, level=None, levelist=None, - number=None, + number=number, + levtype=None, + ) + super().__init__( + array_backend, + metadata=ConstantMetadata(d, self.maker.field.metadata().geography), ) - super().__init__(backend, metadata=ConstantMetadata(d, geometry)) def _make_metadata(self): pass @@ -210,15 +246,8 @@ def _values(self, dtype=None): values = values.astype(dtype) return values - @property - def shape(self): - return self._shape - def __repr__(self): - return "ConstantField(%s,%s)" % ( - self.param, - self.date, - ) + return "ConstantField(%s,%s,%s)" % (self.param, self.date, self.number) def make_datetime(date, time): @@ -258,34 +287,64 @@ def index_to_coords(index: int, shape): class ConstantsFieldListCore(FieldList): - def __init__(self, source_or_dataset, request={}, repeat=1, **kwargs): + def __init__(self, source_or_dataset, request={}, **kwargs): request = dict(**request) request.update(kwargs) self.request = self._request(**request) - if "date" in self.request: - self.dates = [ - make_datetime(date, time) - for date, time in itertools.product( - self.request["date"], self.request.get("time", [None]) - ) - ] - assert len(set(self.dates)) == len( - self.dates - ), "Duplicates dates in constants." - else: - self.dates = source_or_dataset.unique_values("valid_datetime")[ - "valid_datetime" - ] + def find_numbers(source_or_dataset): + if "number" in self.request: + return self.request["number"] + + assert hasattr(source_or_dataset, "unique_values"), ( + f"{source_or_dataset} (type '{type(source_or_dataset).__name__}') is" + " not a proper source or dataset" + ) + + return source_or_dataset.unique_values( + "number", patches={"number": {None: 0}} + )["number"] + + def find_dates(source_or_dataset): + if "date" not in self.request and "time" in self.request: + raise ValueError("Cannot specify time without date") + + if "date" in self.request and "time" not in self.request: + return self.request["date"] + + if "date" in self.request and "time" in self.request: + dates = [ + make_datetime(date, time) + for date, time in itertools.product( + self.request["date"], self.request["time"] + ) + ] + assert len(set(dates)) == len(dates), "Duplicates dates in constants." + return dates + + assert "date" not in self.request and "time" not in self.request + assert hasattr(source_or_dataset, "unique_values"), ( + f"{source_or_dataset} (type '{type(source_or_dataset).__name__}') is" + " not a proper source or dataset" + ) + + return source_or_dataset.unique_values("valid_datetime")["valid_datetime"] + + self.dates = find_dates(source_or_dataset) self.params = self.request["param"] - if not isinstance(self.params, list): + if not isinstance(self.params, (tuple, list)): self.params = [self.params] - self.repeat = repeat # For ensembles - self.maker = ConstantMaker(source_or_dataset) + + # self.numbers = self.request.get("number", [None]) + self.numbers = find_numbers(source_or_dataset) + if not isinstance(self.numbers, (tuple, list)): + self.numbers = [self.numbers] + + self.maker = ConstantMaker(field=source_or_dataset[0]) self.procs = {param: getattr(self.maker, param) for param in self.params} - self._len = len(self.dates) * len(self.params) * self.repeat + self._len = len(self.dates) * len(self.params) * len(self.numbers) super().__init__(**kwargs) @@ -312,23 +371,22 @@ def _getitem(self, n): if n >= self._len or n < 0: raise IndexError(n) - date, param, repeat = index_to_coords( - n, (len(self.dates), len(self.params), self.repeat) + date, param, number = index_to_coords( + n, (len(self.dates), len(self.params), len(self.numbers)) ) - assert repeat == 0, "Not implemented" - date = self.dates[date] # assert isinstance(date, datetime.datetime), (date, type(date)) - param = self.params[param] + number = self.numbers[number] + return ConstantField( + self.maker, date, param, self.procs[param], - self.maker.shape, - self.maker.field.metadata().geography, - self.array_backend, + number=number, + array_backend=self.array_backend, ) From e80e8b9a0e8f0d5c5ad8c3498f281398919c0055 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 10 May 2024 16:35:58 +0100 Subject: [PATCH 03/22] Add suport for anemoi-datasets --- src/earthkit/data/core/__init__.py | 2 +- src/earthkit/data/core/fieldlist.py | 13 + src/earthkit/data/core/geography.py | 4 + src/earthkit/data/core/index.py | 34 +-- src/earthkit/data/core/order.py | 18 +- src/earthkit/data/indexing/cube.py | 270 +++++++++++++++++++++ src/earthkit/data/readers/grib/metadata.py | 20 ++ src/earthkit/data/readers/netcdf/field.py | 4 + src/earthkit/data/sources/empty.py | 5 +- tests/constants/test_constants_slice.py | 16 +- tests/grib/test_grib_slice.py | 16 +- tests/netcdf/test_netcdf_slice.py | 16 +- 12 files changed, 385 insertions(+), 33 deletions(-) create mode 100644 src/earthkit/data/indexing/cube.py diff --git a/src/earthkit/data/core/__init__.py b/src/earthkit/data/core/__init__.py index 81927e73..5a114f54 100644 --- a/src/earthkit/data/core/__init__.py +++ b/src/earthkit/data/core/__init__.py @@ -122,7 +122,7 @@ def unique_values(self, *coords, remapping=None, patches=None, progress_bar=True for f in iterable: metadata = remapping(f.metadata) for k in coords: - v = metadata(k) + v = metadata(k, default=None) vals[k][v] = True vals = {k: tuple(values.keys()) for k, values in vals.items()} diff --git a/src/earthkit/data/core/fieldlist.py b/src/earthkit/data/core/fieldlist.py index e293ffa5..284bdf56 100644 --- a/src/earthkit/data/core/fieldlist.py +++ b/src/earthkit/data/core/fieldlist.py @@ -341,6 +341,14 @@ def to_latlon(self, flatten=False, dtype=None): lon, lat = self.data(("lon", "lat"), flatten=flatten, dtype=dtype) return dict(lat=lat, lon=lon) + def grid_points(self): + r = self.to_latlon(flatten=True) + return r["lat"], r["lon"] + + @property + def resolution(self): + return self._metadata.geography.resolution() + @property def shape(self): r"""tuple: Get the shape of the field. @@ -1417,6 +1425,11 @@ def _to_array_fieldlist(self, **kwargs): md = [f.metadata() for f in self] return self.from_array(self.to_array(**kwargs), md) + def cube(self, *args, **kwargs): + from earthkit.data.indexing.cube import FieldCube + + return FieldCube(self, *args, **kwargs) + @classmethod def new_mask_index(self, *args, **kwargs): return MaskFieldList(*args, **kwargs) diff --git a/src/earthkit/data/core/geography.py b/src/earthkit/data/core/geography.py index ee02e21a..0df0b03b 100644 --- a/src/earthkit/data/core/geography.py +++ b/src/earthkit/data/core/geography.py @@ -127,3 +127,7 @@ def gridspec(self): :class:`~data.core.gridspec.GridSpec>` """ pass + + @abstractmethod + def resolution(self): + pass diff --git a/src/earthkit/data/core/index.py b/src/earthkit/data/core/index.py index a7323253..9d2823ef 100644 --- a/src/earthkit/data/core/index.py +++ b/src/earthkit/data/core/index.py @@ -395,7 +395,7 @@ def isel(self, *args, **kwargs): return self.sel(**kwargs) - def order_by(self, *args, remapping=None, **kwargs): + def order_by(self, *args, remapping=None, patches=None, **kwargs): """Changes the order of the elements in a fieldlist-like object. Parameters @@ -492,7 +492,7 @@ def order_by(self, *args, remapping=None, **kwargs): kwargs = normalize_order_by(*args, **kwargs) kwargs = self._normalize_kwargs_names(**kwargs) - remapping = build_remapping(remapping) + remapping = build_remapping(remapping, patches) if not kwargs: return self @@ -511,35 +511,39 @@ def cmp(i, j): def __getitem__(self, n): if isinstance(n, slice): - return self.from_slice(n) + return self._from_slice(n) if isinstance(n, (tuple, list)): - return self.from_multi(n) + return self._from_sequence(n) if isinstance(n, dict): - return self.from_dict(n) + return self._from_dict(n) else: import numpy as np if isinstance(n, np.ndarray): - return self.from_multi(n) + return self._from_ndarray(n) return self._getitem(n) - def from_slice(self, s): + def _from_slice(self, s): indices = range(len(self))[s] return self.new_mask_index(self, indices) - def from_mask(self, lst): + def _from_mask(self, lst): indices = [i for i, x in enumerate(lst) if x] return self.new_mask_index(self, indices) - def from_multi(self, a): - import numpy as np + def _from_sequence(self, s): + return self.new_mask_index(self, s) - # will raise IndexError if an index is out of bounds - n = len(self) - indices = np.arange(0, n if n > 0 else 0) - indices = indices[a].tolist() - return self.new_mask_index(self, indices) + def _from_ndarray(self, a): + return self._from_sequence(a.tolist()) + # import numpy as np + + # # will raise IndexError if an index is out of bounds + # n = len(self) + # indices = np.arange(0, n if n > 0 else 0) + # indices = indices[a].tolist() + # return self.new_mask_index(self, indices) def from_dict(self, dic): return self.sel(dic) diff --git a/src/earthkit/data/core/order.py b/src/earthkit/data/core/order.py index 17e8906b..509eaf52 100644 --- a/src/earthkit/data/core/order.py +++ b/src/earthkit/data/core/order.py @@ -30,8 +30,8 @@ def __call__(self, func): return func class CustomJoiner: - def format_name(self, x): - return func(x) + def format_name(self, x, **kwargs): + return func(x, **kwargs) def format_string(self, x): return str(x) @@ -41,12 +41,12 @@ def join(self, args): joiner = CustomJoiner() - def wrapped(name): - return self.substitute(name, joiner) + def wrapped(name, **kwargs): + return self.substitute(name, joiner, **kwargs) return wrapped - def substitute(self, name, joiner): + def substitute(self, name, joiner, **kwargs): if name in self.lists: if callable(self.lists[name]): return self.lists[name]() @@ -54,7 +54,7 @@ def substitute(self, name, joiner): lst = [] for i, bit in enumerate(self.lists[name]): if i % 2: - p = joiner.format_name(bit) + p = joiner.format_name(bit, **kwargs) if p is not None: lst.append(p) else: @@ -62,7 +62,7 @@ def substitute(self, name, joiner): else: lst.append(joiner.format_string(bit)) return joiner.join(lst) - return joiner.format_name(name) + return joiner.format_name(name, **kwargs) def as_dict(self): return dict(self) @@ -99,8 +99,8 @@ def __init__(self, proc, name, patch): def __call__(self, func): next = self.proc(func) - def wrapped(name): - result = next(name) + def wrapped(name, **kwargs): + result = next(name, **kwargs) if name == self.name: result = self.patch(result) return result diff --git a/src/earthkit/data/indexing/cube.py b/src/earthkit/data/indexing/cube.py new file mode 100644 index 00000000..f895c2f5 --- /dev/null +++ b/src/earthkit/data/indexing/cube.py @@ -0,0 +1,270 @@ +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import itertools +import logging +import math +import re + +LOG = logging.getLogger(__name__) + + +def coords_to_index(coords, shape) -> int: + a = 0 + n = 1 + i = len(coords) - 1 + while i >= 0: + a += coords[i] * n + n *= shape[i] + i -= 1 + return a + + +def index_to_coords(index: int, shape): + assert isinstance(index, int), (index, type(index)) + + result = [None] * len(shape) + i = len(shape) - 1 + + while i >= 0: + result[i] = index % shape[i] + index = index // shape[i] + i -= 1 + + result = tuple(result) + + assert len(result) == len(shape) + return result + + +class FieldCube: + def __init__( + self, + ds, + *args, + remapping=None, + patches=None, + flatten_values=False, + ): + assert len(ds), f"No data in {ds}" + + self.flatten_values = flatten_values + + if len(args) == 1 and isinstance(args[0], (list, tuple)): + args = args[0] + + names = [] + for a in args: + if isinstance(a, str): + names.append(a) + elif isinstance(a, dict): + names += list(a.keys()) + + self._field_shape = None + + # Sort the source according to their + # internal_args = reduce(operator.add, [Separator.split(a) for a in args], []) + # for i in ds: + # print(i) + # print("before") + # print(ds[0]) + # print(ds[1]) + # print(ds[2]) + # print(ds[3]) + self.source = ds.order_by(*args, remapping=remapping, patches=patches) + del ds + # print("after") + # print(self.source[0]) + # print(self.source[1]) + # print(self.source[2]) + # print(self.source[3]) + + # Get a mapping of user names to unique values + # With possible reduce dimentionality if the user use 'level+param' + self.user_coords = self.source.unique_values( + *names, remapping=remapping, patches=patches + ) + + self.user_shape = tuple(len(v) for k, v in self.user_coords.items()) + + if math.prod(self.user_shape) != len(self.source): + details = [] + for key, v in self.user_coords.items(): + details.append(f"{key=} ({len(v)}) {v}") + assert not isinstance( + self.source, str + ), f"Not expecting a str here ({self.source})" + for i, f in enumerate(self.source): + details.append(f"{i}={f} {f.metadata('number')}") + if i > 30: + details.append("...") + break + + msg = ( + f"Shape {self.user_shape} [{math.prod(self.user_shape):,}]" + + f" does not match number of available fields {len(self.source):,}. " + + f"Difference: {len(self.source)-math.prod(self.user_shape):,}\n" + + "\n".join(details) + ) + raise ValueError(msg) + + @property + def field_shape(self): + if self._field_shape is None: + self._field_shape = self.source[0].shape + + if self.flatten_values: + self._field_shape = (math.prod(self._field_shape),) + + assert isinstance(self._field_shape, tuple), ( + self._field_shape, + self.source[0], + ) + return self._field_shape + + @property + def extended_user_shape(self): + return self.user_shape + self.field_shape + + def __str__(self): + content = ", ".join([f"{k}:{len(v)}" for k, v in self.user_coords.items()]) + return f"{self.__class__.__name__}({content} ({len(self.source)} fields))" + + def squeeze(self): + # TODO: remove an dimensions of one. + return self + + def __getitem__(self, indexes): + # Make sure the requested indexes are a list of slices matching the shape + + if isinstance(indexes, int): + indexes = (indexes,) # make tuple + + if isinstance(indexes, list): + indexes = tuple(indexes) + + assert isinstance(indexes, tuple), (type(indexes), indexes) + + indexes = list(indexes) + + if indexes[-1] is Ellipsis: + indexes.pop() + + while len(indexes) < len(self.user_shape): + indexes.append(slice(None, None, None)) + + # Map the slices to a list of indexes per dimension + coords = [] + for s, c in zip(indexes, self.user_coords.values()): + lst = list(range(len(c)))[s] + if not isinstance(lst, list): + lst = [lst] + coords.append(lst) + + # Transform the coordinates to a list of indexes for the underlying dataset + dataset_indexes = [] + user_shape = self.user_shape + for x in itertools.product(*coords): + i = coords_to_index(x, user_shape) + assert isinstance(i, int), i + dataset_indexes.append(i) + + print("dataset_indexes", dataset_indexes, "tuple", tuple(dataset_indexes)) + + ds = self.source[tuple(dataset_indexes)] + + # If we match just one element, we return it + if all(len(_) == 1 for _ in coords): + return ds[0] + + # For more than one element, we return a new cube + return FieldCube( + ds, + *self.user_coords, + flatten_values=self.flatten_values, + ) + + def to_numpy(self, **kwargs): + return self.source.to_numpy(**kwargs).reshape(*self.extended_user_shape) + + def _names(self, coords, reading_chunks=None, **kwargs): + if reading_chunks is None: + reading_chunks = list(coords.keys()) + if isinstance(reading_chunks, (list, tuple)): + assert all(isinstance(_, str) for _ in reading_chunks) + reading_chunks = {k: len(coords[k]) for k in reading_chunks} + + for k, requested in reading_chunks.items(): + full_len = len(coords[k]) + assert full_len == requested, "only full chunks supported for now" + + names = list(coords[a] for a, _ in reading_chunks.items()) + + return names + + def count(self, reading_chunks=None): + names = self._names(reading_chunks=reading_chunks, coords=self.user_coords) + return math.prod(len(lst) for lst in names) + + def iterate_cubelets(self, reading_chunks=None, **kwargs): + names = self._names(reading_chunks=reading_chunks, coords=self.user_coords) + indexes = list(range(0, len(lst)) for lst in names) + + return ( + Cubelet(self, i, coords_names=n) + for n, i in zip(itertools.product(*names), itertools.product(*indexes)) + ) + + def chunking(self, chunks): + if isinstance(chunks, (str, int)): + m = re.match(r"(\d+)\s*(.*)?", str(chunks)) + if not m: + raise ValueError(f"Cannot parse {chunks}") + size = int(m.group(1)) + unit = m.group(2).lower() + if unit == "": + unit = "m" + size *= dict(k=1024, m=1024 * 1024, g=1024 * 1024 * 1024)[unit[0]] + + # TODO: use dtype. + # We assuming float32 = 4 bytes per + + rest = math.prod(self.extended_user_shape[1:]) * 4 + first = int(size / rest + 0.5) + return (first,) + self.extended_user_shape[1:] + + if not chunks: + return True # Let ZARR choose + + lst = list(self.user_shape) + for i, name in enumerate(self.user_coords): + lst[i] = chunks.get(name, lst[i]) + + return tuple(lst) + + +class Cubelet: + def __init__(self, cube, coords, coords_names=None): + self._coords_names = coords_names # only for display purposes + self.owner = cube + assert all(isinstance(_, int) for _ in coords), coords + self.coords = coords + self.flatten_values = cube.flatten_values + + def __repr__(self): + return ( + f"{self.__class__.__name__}({self.coords},index_names={self._coords_names})" + ) + + @property + def extended_icoords(self): + return self.coords + + def to_numpy(self, **kwargs): + return self.owner[self.coords].to_numpy(flatten=self.flatten_values, **kwargs) diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index f92eec6f..d12de256 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -138,6 +138,26 @@ def bounding_box(self): def gridspec(self): return make_gridspec(self.metadata) + def resolution(self): + grid_type = self.metadata.get("gridType") + + if grid_type in ("reduced_gg", "reduced_rotated_gg"): + return self.metadata.get("gridName") + + if grid_type == "regular_ll": + x = self.metadata.get("DxInDegrees") + y = self.metadata.get("DyInDegrees") + assert x == y, (x, y) + return x + + if grid_type == "lambert": + x = self.metadata.get("DxInMetres") + y = self.metadata.get("DyInMetres") + assert x == y, (x, y) + return str(x / 1000).replace(".", "p") + "km" + + raise ValueError(f"Unknown gridType={grid_type}") + class GribMetadata(Metadata): """Represent the metadata of a GRIB field. diff --git a/src/earthkit/data/readers/netcdf/field.py b/src/earthkit/data/readers/netcdf/field.py index 5e7093f7..ff17c041 100644 --- a/src/earthkit/data/readers/netcdf/field.py +++ b/src/earthkit/data/readers/netcdf/field.py @@ -68,6 +68,10 @@ def _grid_mapping(self): def gridspec(self): raise NotImplementedError("gridspec is not implemented for netcdf/xarray") + def resolution(self): + # TODO: implement resolution + return None + class XArrayMetadata(RawMetadata): LS_KEYS = ["variable", "level", "valid_datetime", "units"] diff --git a/src/earthkit/data/sources/empty.py b/src/earthkit/data/sources/empty.py index 6c139a44..35c787a2 100644 --- a/src/earthkit/data/sources/empty.py +++ b/src/earthkit/data/sources/empty.py @@ -7,10 +7,11 @@ # nor does it submit to any jurisdiction. # -from . import Source +from earthkit.data.core.fieldlist import FieldList -class EmptySource(Source): + +class EmptySource(FieldList): def ignore(self): # Used by multi-source return True diff --git a/tests/constants/test_constants_slice.py b/tests/constants/test_constants_slice.py index 4732dc6f..97e7d968 100644 --- a/tests/constants/test_constants_slice.py +++ b/tests/constants/test_constants_slice.py @@ -73,7 +73,11 @@ def test_constants_slice(indexes): @pytest.mark.parametrize( "indexes1,indexes2", - [(np.array([1, 16, 5, 9]), np.array([1, 3])), ([1, 16, 5, 9], [1, 3])], + [ + (np.array([1, 16, 5, 9]), np.array([1, 3])), + ([1, 16, 5, 9], [1, 3]), + ((1, 16, 5, 9), (1, 3)), + ], ) def test_constants_array_indexing(indexes1, indexes2): ds, md = load_constants_fs() @@ -93,7 +97,15 @@ def test_constants_array_indexing(indexes1, indexes2): assert r1.metadata(["valid_datetime", "param"]) == ref_md -@pytest.mark.parametrize("indexes", [(np.array([1, 96, 5, 9])), ([1, 96, 5, 9])]) +@pytest.mark.skip(reason="Index range checking disabled") +@pytest.mark.parametrize( + "indexes", + [ + (np.array([1, 96, 5, 9])), + ([1, 16, 5, 9], [1, 3]), + ((1, 16, 5, 9), (1, 3)), + ], +) def test_constants_array_indexing_bad(indexes): ds, _ = load_constants_fs() with pytest.raises(IndexError): diff --git a/tests/grib/test_grib_slice.py b/tests/grib/test_grib_slice.py index 1e3a42df..d4033f43 100644 --- a/tests/grib/test_grib_slice.py +++ b/tests/grib/test_grib_slice.py @@ -108,7 +108,11 @@ def test_grib_slice_multi_file(indexes, expected_meta): @pytest.mark.parametrize("array_backend", ARRAY_BACKENDS) @pytest.mark.parametrize( "indexes1,indexes2", - [(np.array([1, 16, 5, 9]), np.array([1, 3])), ([1, 16, 5, 9], [1, 3])], + [ + (np.array([1, 16, 5, 9]), np.array([1, 3])), + ([1, 16, 5, 9], [1, 3]), + ((1, 16, 5, 9), (1, 3)), + ], ) def test_grib_array_indexing(fl_type, array_backend, indexes1, indexes2): f = load_grib_data("tuv_pl.grib", fl_type, array_backend) @@ -122,9 +126,17 @@ def test_grib_array_indexing(fl_type, array_backend, indexes1, indexes2): assert r1.metadata("shortName") == ["u", "t"] +@pytest.mark.skip(reason="Index range checking disabled") @pytest.mark.parametrize("fl_type", FL_TYPES) @pytest.mark.parametrize("array_backend", ARRAY_BACKENDS) -@pytest.mark.parametrize("indexes", [(np.array([1, 19, 5, 9])), ([1, 19, 5, 9])]) +@pytest.mark.parametrize( + "indexes", + [ + (np.array([1, 19, 5, 9])), + ([1, 16, 5, 9], [1, 3]), + ((1, 16, 5, 9), (1, 3)), + ], +) def test_grib_array_indexing_bad(fl_type, array_backend, indexes): f = load_grib_data("tuv_pl.grib", fl_type, array_backend) with pytest.raises(IndexError): diff --git a/tests/netcdf/test_netcdf_slice.py b/tests/netcdf/test_netcdf_slice.py index 7ee5c86d..b077ca05 100644 --- a/tests/netcdf/test_netcdf_slice.py +++ b/tests/netcdf/test_netcdf_slice.py @@ -96,7 +96,11 @@ def test_netcdf_slice_multi_file(indexes, expected_meta): @pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "indexes1,indexes2", - [(np.array([1, 16, 5, 9]), np.array([1, 3])), ([1, 16, 5, 9], [1, 3])], + [ + (np.array([1, 16, 5, 9]), np.array([1, 3])), + ([1, 16, 5, 9], [1, 3]), + ((1, 16, 5, 9), (1, 3)), + ], ) def test_netcdf_array_indexing(mode, indexes1, indexes2): f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) @@ -110,8 +114,16 @@ def test_netcdf_array_indexing(mode, indexes1, indexes2): assert r1.metadata("variable") == ["v", "u"] +@pytest.mark.skip(reason="Index range checking disabled") @pytest.mark.parametrize("mode", ["nc", "xr"]) -@pytest.mark.parametrize("indexes", [(np.array([1, 19, 5, 9])), ([1, 19, 5, 9])]) +@pytest.mark.parametrize( + "indexes", + [ + (np.array([1, 19, 5, 9])), + ([1, 16, 5, 9], [1, 3]), + ((1, 16, 5, 9), (1, 3)), + ], +) def test_netcdf_array_indexing_bad(mode, indexes): f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) with pytest.raises(IndexError): From baefc8e87af105558efa6e6ff8a92c9785bd3542 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 10 May 2024 17:50:58 +0100 Subject: [PATCH 04/22] Add suport for anemoi-datasets --- src/earthkit/data/core/metadata.py | 2 +- src/earthkit/data/indexing/cube.py | 3 +-- tests/constants/test_constants_metadata.py | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/earthkit/data/core/metadata.py b/src/earthkit/data/core/metadata.py index ce292f3e..07239d87 100644 --- a/src/earthkit/data/core/metadata.py +++ b/src/earthkit/data/core/metadata.py @@ -207,7 +207,7 @@ def _get_custom_key(self, key, default=None, raise_on_missing=True, **kwargs): if self._is_custom_key(key): try: if key == DATETIME: - return self._valid_datetime() + return self._valid_datetime().isoformat() elif key == GRIDSPEC: return self.grid_spec except Exception as e: diff --git a/src/earthkit/data/indexing/cube.py b/src/earthkit/data/indexing/cube.py index f895c2f5..873ea720 100644 --- a/src/earthkit/data/indexing/cube.py +++ b/src/earthkit/data/indexing/cube.py @@ -78,6 +78,7 @@ def __init__( # print(ds[2]) # print(ds[3]) self.source = ds.order_by(*args, remapping=remapping, patches=patches) + del ds # print("after") # print(self.source[0]) @@ -175,8 +176,6 @@ def __getitem__(self, indexes): assert isinstance(i, int), i dataset_indexes.append(i) - print("dataset_indexes", dataset_indexes, "tuple", tuple(dataset_indexes)) - ds = self.source[tuple(dataset_indexes)] # If we match just one element, we return it diff --git a/tests/constants/test_constants_metadata.py b/tests/constants/test_constants_metadata.py index 9cd2698b..0380b8eb 100644 --- a/tests/constants/test_constants_metadata.py +++ b/tests/constants/test_constants_metadata.py @@ -36,7 +36,8 @@ def test_constants_valid_datetime(): ds, _ = load_constants_fs(last_step=12) f = ds[4] - assert f.metadata("valid_datetime") == "2020-05-13T18:00:00" + assert f.metadata("valid_datetime") == datetime.datetime(2020, 5, 13, 18) + # "2020-05-13T18:00:00" if __name__ == "__main__": From 2d1617cabbe72a68a25df146bcafece49a1ffcd1 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Wed, 15 May 2024 10:16:17 +0100 Subject: [PATCH 05/22] Add suport for anemoi-datasets --- tests/core/test_metadata.py | 4 +--- tests/grib/test_grib_metadata.py | 2 +- tests/grib/test_grib_order_by.py | 21 ++++++++++----------- tests/grib/test_grib_sel.py | 3 +-- tests/netcdf/test_netcdf_metadata.py | 2 +- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/tests/core/test_metadata.py b/tests/core/test_metadata.py index 4c296fca..fc397473 100644 --- a/tests/core/test_metadata.py +++ b/tests/core/test_metadata.py @@ -9,8 +9,6 @@ # nor does it submit to any jurisdiction. # -import datetime - import pytest from earthkit.data import from_source @@ -166,7 +164,7 @@ def test_grib_grib_metadata_valid_datetime(): ds = from_source("file", earthkit_test_data_file("t_time_series.grib")) md = ds[4].metadata() - assert md["valid_datetime"] == datetime.datetime(2020, 12, 21, 18) + assert md["valid_datetime"] == "2020-12-21T18:00:00" def test_grib_metadata_override(): diff --git a/tests/grib/test_grib_metadata.py b/tests/grib/test_grib_metadata.py index ced1e290..cdd676c6 100644 --- a/tests/grib/test_grib_metadata.py +++ b/tests/grib/test_grib_metadata.py @@ -532,7 +532,7 @@ def test_grib_valid_datetime(fl_type, array_backend): ds = load_grib_data("t_time_series.grib", fl_type, array_backend, folder="data") f = ds[4] - assert f.metadata("valid_datetime") == datetime.datetime(2020, 12, 21, 18) + assert f.metadata("valid_datetime") == "2020-12-21T18:00:00" @pytest.mark.parametrize("fl_type", ["file"]) diff --git a/tests/grib/test_grib_order_by.py b/tests/grib/test_grib_order_by.py index f515cf7e..87793476 100644 --- a/tests/grib/test_grib_order_by.py +++ b/tests/grib/test_grib_order_by.py @@ -9,7 +9,6 @@ # nor does it submit to any jurisdiction. # -import datetime import os import sys @@ -186,16 +185,16 @@ def test_grib_order_by_valid_datetime(fl_type, array_backend): assert len(g) == 10 ref = [ - datetime.datetime(2020, 12, 23, 12, 0), - datetime.datetime(2020, 12, 23, 12, 0), - datetime.datetime(2020, 12, 21, 21, 0), - datetime.datetime(2020, 12, 21, 21, 0), - datetime.datetime(2020, 12, 21, 18, 0), - datetime.datetime(2020, 12, 21, 18, 0), - datetime.datetime(2020, 12, 21, 15, 0), - datetime.datetime(2020, 12, 21, 15, 0), - datetime.datetime(2020, 12, 21, 12, 0), - datetime.datetime(2020, 12, 21, 12, 0), + "2020-12-23T12:00:00", + "2020-12-23T12:00:00", + "2020-12-21T21:00:00", + "2020-12-21T21:00:00", + "2020-12-21T18:00:00", + "2020-12-21T18:00:00", + "2020-12-21T15:00:00", + "2020-12-21T15:00:00", + "2020-12-21T12:00:00", + "2020-12-21T12:00:00", ] assert g.metadata("valid_datetime") == ref diff --git a/tests/grib/test_grib_sel.py b/tests/grib/test_grib_sel.py index 26a15a1a..b56e9060 100644 --- a/tests/grib/test_grib_sel.py +++ b/tests/grib/test_grib_sel.py @@ -9,7 +9,6 @@ # nor does it submit to any jurisdiction. # -import datetime import os import sys @@ -198,7 +197,7 @@ def test_grib_sel_date(fl_type, array_backend): def test_grib_sel_valid_datetime(fl_type, array_backend): f = load_grib_data("t_time_series.grib", fl_type, array_backend, folder="data") - g = f.sel(valid_datetime=datetime.datetime(2020, 12, 21, 21)) + g = f.sel(valid_datetime="2020-12-21T21:00:00") assert len(g) == 2 ref_keys = ["shortName", "date", "time", "step"] diff --git a/tests/netcdf/test_netcdf_metadata.py b/tests/netcdf/test_netcdf_metadata.py index e2a6b8be..8ec28656 100644 --- a/tests/netcdf/test_netcdf_metadata.py +++ b/tests/netcdf/test_netcdf_metadata.py @@ -77,7 +77,7 @@ def test_netcdf_datetime(): @pytest.mark.parametrize("mode", ["nc", "xr"]) def test_netcdf_valid_datetime(mode): ds = load_nc_or_xr_source(earthkit_examples_file("test.nc"), mode) - assert ds[0].metadata("valid_datetime") == datetime.datetime(2020, 5, 13, 12) + assert ds[0].metadata("valid_datetime") == "2020-05-13T12:00:00" if __name__ == "__main__": From 88f6113b185ea2f54661531ef6fbd522dd876262 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Wed, 15 May 2024 16:18:32 +0100 Subject: [PATCH 06/22] Add suport for anemoi-datasets --- src/earthkit/data/readers/netcdf/field.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/earthkit/data/readers/netcdf/field.py b/src/earthkit/data/readers/netcdf/field.py index ff17c041..ec23ae1e 100644 --- a/src/earthkit/data/readers/netcdf/field.py +++ b/src/earthkit/data/readers/netcdf/field.py @@ -107,7 +107,7 @@ def __init__(self, field): if time is not None: self.time = to_datetime(time) if "forecast_reference_time" in field._ds.data_vars: - forecast_reference_time = field.ds["forecast_reference_time"].data + forecast_reference_time = field._ds["forecast_reference_time"].data assert forecast_reference_time.ndim == 0, forecast_reference_time forecast_reference_time = forecast_reference_time.astype( "datetime64[s]" From a222790cd2f9540a4f95fb0ddf22975f44c6df19 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Wed, 15 May 2024 16:29:23 +0100 Subject: [PATCH 07/22] Add suport for anemoi-datasets --- src/earthkit/data/readers/netcdf/field.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/earthkit/data/readers/netcdf/field.py b/src/earthkit/data/readers/netcdf/field.py index ec23ae1e..78bbc9c0 100644 --- a/src/earthkit/data/readers/netcdf/field.py +++ b/src/earthkit/data/readers/netcdf/field.py @@ -9,6 +9,9 @@ import logging from datetime import timedelta +from functools import cached_property + +import numpy as np from earthkit.data.core.fieldlist import Field from earthkit.data.core.geography import Geography @@ -234,6 +237,25 @@ def _values(self, dtype=None): else: return self._to_numpy().astype(dtype, copy=False) + @cached_property + def grid_mapping(self): + def tidy(x): + if isinstance(x, np.ndarray): + return x.tolist() + if isinstance(x, (tuple, list)): + return [tidy(y) for y in x] + if isinstance(x, dict): + return {k: tidy(v) for k, v in x.items()} + return x + + # return tidy( + # self.owner.xr_dataset[ + # self.owner.xr_dataset[self.variable].grid_mapping + # ].attrs + # ) + + return tidy(self._ds[self._ds[self.variable].grid_mapping].attrs) + class NetCDFMetadata(XArrayMetadata): pass From 8f8f9eb80f4070dee174f9c4beffb54dc1093c51 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 16 May 2024 16:31:22 +0100 Subject: [PATCH 08/22] Add suport for anemoi-datasets --- src/earthkit/data/core/fieldlist.py | 8 ++++++++ src/earthkit/data/core/geography.py | 8 ++++++++ src/earthkit/data/readers/grib/metadata.py | 16 ++++++++++++++++ src/earthkit/data/readers/netcdf/field.py | 10 ++++++++++ src/earthkit/data/sources/ecmwf_api.py | 1 + tests/netcdf/test_netcdf_sel.py | 8 +++----- tests/netcdf/test_netcdf_summary.py | 14 ++++++-------- 7 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/earthkit/data/core/fieldlist.py b/src/earthkit/data/core/fieldlist.py index 3580fd47..fb982455 100644 --- a/src/earthkit/data/core/fieldlist.py +++ b/src/earthkit/data/core/fieldlist.py @@ -349,6 +349,14 @@ def grid_points(self): def resolution(self): return self._metadata.geography.resolution() + @property + def mars_grid(self): + return self._metadata.geography.mars_grid() + + @property + def mars_area(self): + return self._metadata.geography.mars_area() + @property def shape(self): r"""tuple: Get the shape of the field. diff --git a/src/earthkit/data/core/geography.py b/src/earthkit/data/core/geography.py index 0df0b03b..ca55bda7 100644 --- a/src/earthkit/data/core/geography.py +++ b/src/earthkit/data/core/geography.py @@ -131,3 +131,11 @@ def gridspec(self): @abstractmethod def resolution(self): pass + + @abstractmethod + def mars_grid(self): + pass + + @abstractmethod + def mars_area(self): + pass diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index d12de256..5e1deaf9 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -158,6 +158,22 @@ def resolution(self): raise ValueError(f"Unknown gridType={grid_type}") + def mars_grid(self): + if len(self.shape) == 2: + return [ + self.metadata.get("iDirectionIncrementInDegrees"), + self.metadata.get("jDirectionIncrementInDegrees"), + ] + + return self.handle.get("gridName") + + def mars_area(self): + north = self.metadata.get("latitudeOfFirstGridPointInDegrees") + south = self.metadata.get("latitudeOfLastGridPointInDegrees") + west = self.metadata.get("longitudeOfFirstGridPointInDegrees") + east = self.metadata.get("longitudeOfLastGridPointInDegrees") + return [north, west, south, east] + class GribMetadata(Metadata): """Represent the metadata of a GRIB field. diff --git a/src/earthkit/data/readers/netcdf/field.py b/src/earthkit/data/readers/netcdf/field.py index 78bbc9c0..c2aec69a 100644 --- a/src/earthkit/data/readers/netcdf/field.py +++ b/src/earthkit/data/readers/netcdf/field.py @@ -75,6 +75,16 @@ def resolution(self): # TODO: implement resolution return None + @property + def mars_grid(self): + # TODO: implement mars_grid + return None + + @property + def mars_area(self): + # TODO: code me + return [self.north, self.west, self.south, self.east] + class XArrayMetadata(RawMetadata): LS_KEYS = ["variable", "level", "valid_datetime", "units"] diff --git a/src/earthkit/data/sources/ecmwf_api.py b/src/earthkit/data/sources/ecmwf_api.py index 569a932f..83528256 100644 --- a/src/earthkit/data/sources/ecmwf_api.py +++ b/src/earthkit/data/sources/ecmwf_api.py @@ -85,6 +85,7 @@ def retrieve(target, request): @normalize("date", "date-list(%Y-%m-%d)") @normalize("area", "bounding-box(list)") def requests(self, **kwargs): + kwargs.pop("accumulation_period", None) split_on = kwargs.pop("split_on", None) if split_on is None or not isinstance(kwargs.get(split_on), (list, tuple)): return [kwargs] diff --git a/tests/netcdf/test_netcdf_sel.py b/tests/netcdf/test_netcdf_sel.py index d6f7f811..92567a94 100644 --- a/tests/netcdf/test_netcdf_sel.py +++ b/tests/netcdf/test_netcdf_sel.py @@ -9,8 +9,6 @@ # nor does it submit to any jurisdiction. # -import datetime - import pytest from earthkit.data.testing import earthkit_examples_file, load_nc_or_xr_source @@ -37,11 +35,11 @@ dict( variable=["t"], level=[500, 700], - valid_datetime=datetime.datetime(2018, 8, 1, 12, 0), + valid_datetime="2018-08-01T12:00:00", ), [ - ["t", 700, datetime.datetime(2018, 8, 1, 12, 0)], - ["t", 500, datetime.datetime(2018, 8, 1, 12, 0)], + ["t", 700, "2018-08-01T12:00:00"], + ["t", 500, "2018-08-01T12:00:00"], ], ["variable", "level", "valid_datetime"], ), diff --git a/tests/netcdf/test_netcdf_summary.py b/tests/netcdf/test_netcdf_summary.py index c7d5cd54..567d8069 100644 --- a/tests/netcdf/test_netcdf_summary.py +++ b/tests/netcdf/test_netcdf_summary.py @@ -8,8 +8,6 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -import datetime - import pytest from earthkit.data.testing import earthkit_examples_file, load_nc_or_xr_source @@ -27,10 +25,10 @@ def test_netcdf_ls(mode): "variable": {0: "t", 1: "t", 2: "t", 3: "t"}, "level": {0: 1000, 1: 850, 2: 700, 3: 500}, "valid_datetime": { - 0: datetime.datetime.fromisoformat("2018-08-01 12:00:00"), - 1: datetime.datetime.fromisoformat("2018-08-01 12:00:00"), - 2: datetime.datetime.fromisoformat("2018-08-01 12:00:00"), - 3: datetime.datetime.fromisoformat("2018-08-01 12:00:00"), + 0: "2018-08-01T12:00:00", + 1: "2018-08-01T12:00:00", + 2: "2018-08-01T12:00:00", + 3: "2018-08-01T12:00:00", }, "units": {0: "K", 1: "K", 2: "K", 3: "K"}, } @@ -45,8 +43,8 @@ def test_netcdf_ls(mode): "variable": {0: "t", 1: "t"}, "level": {0: 1000, 1: 850}, "valid_datetime": { - 0: datetime.datetime.fromisoformat("2018-08-01 12:00:00"), - 1: datetime.datetime.fromisoformat("2018-08-01 12:00:00"), + 0: "2018-08-01T12:00:00", + 1: "2018-08-01T12:00:00", }, "units": {0: "K", 1: "K"}, "long_name": {0: "Temperature", 1: "Temperature"}, From 9b21e5c1994665b06d42d913adb9251f207f5666 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 16 May 2024 16:35:18 +0100 Subject: [PATCH 09/22] Add suport for anemoi-datasets --- src/earthkit/data/readers/grib/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index 5e1deaf9..d92c9ab2 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -159,7 +159,7 @@ def resolution(self): raise ValueError(f"Unknown gridType={grid_type}") def mars_grid(self): - if len(self.shape) == 2: + if len(self.shape()) == 2: return [ self.metadata.get("iDirectionIncrementInDegrees"), self.metadata.get("jDirectionIncrementInDegrees"), From c94f37d3da8b62ad5051835575582c86452a0d9a Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 16 May 2024 16:40:16 +0100 Subject: [PATCH 10/22] Add suport for anemoi-datasets --- src/earthkit/data/readers/grib/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index d92c9ab2..295fba1a 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -165,7 +165,7 @@ def mars_grid(self): self.metadata.get("jDirectionIncrementInDegrees"), ] - return self.handle.get("gridName") + return self.metadata.get("gridName") def mars_area(self): north = self.metadata.get("latitudeOfFirstGridPointInDegrees") From afb5e7989acdfd0c6afd5cbf88090822aac4969c Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 20 May 2024 18:48:30 +0100 Subject: [PATCH 11/22] Add suport for anemoi-datasets --- src/earthkit/data/core/fieldlist.py | 5 + src/earthkit/data/readers/grib/metadata.py | 52 ++++++- src/earthkit/data/utils/message.py | 18 +++ src/earthkit/data/wrappers/string.py | 9 +- tests/data/rotated_N32_subarea.grib | Bin 0 -> 600 bytes tests/data/rotated_wind_20x20.grib | Bin 0 -> 884 bytes tests/data/wind_20x20.grib | Bin 0 -> 864 bytes tests/grib/test_grib_geography.py | 154 ++++++++++++++++++++- 8 files changed, 232 insertions(+), 6 deletions(-) create mode 100644 tests/data/rotated_N32_subarea.grib create mode 100644 tests/data/rotated_wind_20x20.grib create mode 100644 tests/data/wind_20x20.grib diff --git a/src/earthkit/data/core/fieldlist.py b/src/earthkit/data/core/fieldlist.py index fb982455..f9ac550c 100644 --- a/src/earthkit/data/core/fieldlist.py +++ b/src/earthkit/data/core/fieldlist.py @@ -345,6 +345,11 @@ def grid_points(self): r = self.to_latlon(flatten=True) return r["lat"], r["lon"] + def grid_points_unrotated(self): + lat = self._metadata.geography.latitudes_unrotated() + lon = self._metadata.geography.longitudes_unrotated() + return lat, lon + @property def resolution(self): return self._metadata.geography.resolution() diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index 295fba1a..be9a30ba 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -8,6 +8,8 @@ # import datetime +import warnings +from functools import cached_property from earthkit.data.core.geography import Geography from earthkit.data.core.metadata import Metadata @@ -174,6 +176,55 @@ def mars_area(self): east = self.metadata.get("longitudeOfLastGridPointInDegrees") return [north, west, south, east] + @property + def rotation(self): + return ( + self.metadata.get("latitudeOfSouthernPoleInDegrees"), + self.metadata.get("longitudeOfSouthernPoleInDegrees"), + self.metadata.get("angleOfRotationInDegrees"), + ) + + @cached_property + def rotated(self): + grid_type = self.metadata.get("gridType") + return "rotated" in grid_type + + @cached_property + def rotated_iterator(self): + return self.metadata.get("iteratorDisableUnrotate") is not None + + def latitudes_unrotated(self, **kwargs): + if not self.rotated: + return self.latitudes(**kwargs) + if not self.rotated_iterator: + from earthkit.geo.rotate import rotate + + grid_type = self.metadata.get("gridType") + warnings.warn(f"ecCodes does not support rotated iterator for {grid_type}") + lat, lon = self.latitudes(**kwargs), self.longitudes(**kwargs) + south_pole_lat, south_pole_lon, _ = self.rotation + lat, lon = rotate(lat, lon, south_pole_lat, south_pole_lon) + return lat + + with self.metadata._handle._set_tmp("iteratorDisableUnrotate", 1, 0): + return self.latitudes(**kwargs) + + def longitudes_unrotated(self, **kwargs): + if not self.rotated: + return self.longitudes(**kwargs) + if not self.rotated_iterator: + from earthkit.geo.rotate import rotate + + grid_type = self.metadata.get("gridType") + warnings.warn(f"ecCodes does not support rotated iterator for {grid_type}") + lat, lon = self.latitudes(**kwargs), self.longitudes(**kwargs) + south_pole_lat, south_pole_lon, _ = self.rotation + lat, lon = rotate(lat, lon, south_pole_lat, south_pole_lon) + return lon + + with self.metadata._handle._set_tmp("iteratorDisableUnrotate", 1, 0): + return self.longitudes(**kwargs) + class GribMetadata(Metadata): """Represent the metadata of a GRIB field. @@ -183,7 +234,6 @@ class GribMetadata(Metadata): handle: :obj:`GribCodesHandle` Object representing the ecCodes GRIB handle of the field. - :obj:`GribMetadata` is created internally by a :obj:`GribField` and we can use the field's :meth:`metadata` method to access it. diff --git a/src/earthkit/data/utils/message.py b/src/earthkit/data/utils/message.py index 7499f90b..7d9ada11 100644 --- a/src/earthkit/data/utils/message.py +++ b/src/earthkit/data/utils/message.py @@ -13,6 +13,7 @@ import os import threading import time +from contextlib import contextmanager import eccodes import numpy as np @@ -269,6 +270,23 @@ def set(self, name, value): LOG.exception(e) raise + @contextmanager + def _set_tmp(self, name, new_value, restore_value): + try: + if isinstance(new_value, list): + yield eccodes.codes_set_array(self._handle, name, new_value) + else: + yield eccodes.codes_set(self._handle, name, new_value) + except Exception as e: + LOG.error("Error setting %s=%s", name, new_value) + LOG.exception(e) + raise + finally: + if isinstance(restore_value, list): + eccodes.codes_set_array(self._handle, name, restore_value) + else: + eccodes.codes_set(self._handle, name, restore_value) + def write(self, f): eccodes.codes_write(self._handle, f) diff --git a/src/earthkit/data/wrappers/string.py b/src/earthkit/data/wrappers/string.py index 41949fc2..b64afcbb 100644 --- a/src/earthkit/data/wrappers/string.py +++ b/src/earthkit/data/wrappers/string.py @@ -37,10 +37,11 @@ class StrWrapper(Wrapper): def __init__(self, data): self.data = data - # DEPENDANCIES NOT YET INSTALLED - # def bounding_box(self): - # from earthkit.data.utils.domains import domain_to_area - # return domain_to_area(self.data) + def to_bounding_box(self): + if "/" in self.data: + return tuple(float(x) for x in self.data.split("/")) + else: + raise ValueError(f"Invalid bounding box '{self.data}'") def datetime(self): return parse_date(self.data) diff --git a/tests/data/rotated_N32_subarea.grib b/tests/data/rotated_N32_subarea.grib new file mode 100644 index 0000000000000000000000000000000000000000..1d175de7224b8052e2e0636fa3dae9d5b0526ab1 GIT binary patch literal 600 zcmZ<{@^oTg3S(qoFlk7d`M+T~kR`?lB!CD6L?INMWMpJwVKgu>Fa(QuFlh7r|IfhB zklqjCHyH9U%sdYiPyov|gnnq4;{ahYG~j`Z=QtX;oSuJvE?~lJI>{!$dbLBoTXd)x zpPI`#`@fmNsm3MSl2XE7nKxP~*?GA)`ZZVW@b<7*i{h(n$~#x8+rZS=m9f(4kM%6a zoP;|`FPb-3SEb)BVJUxK9#HwAo~@>a|L(jLRt#m}Ea|+0q$R zZS}u9Qx=p@_nLTV27BuPVO^Cu;Z2zgQ@a;*E_;}7-zwCze2(~n3yYfOiB8?l%fS61 z;!eKb!c(jH*9a6#RZW>ZV{!lLyDLSPFIg~O=z&C+gL>o4RbdGbn=#^Qvceli* z-8-)BZClUGz+iAZH+M?nnvd&`>{&HKptXOS*q+jZ`X|pGxV}wE!?6-+H3p)W$OtP9MFddbH?N_(sWP z2i7s~?LPMIq|=#i7tUPlKK=P(+u5#j-*?wm*iAWMRkkB0$BgMcW6f|HDlOe~BB1_p*;5iN%QTns`C zoQxI?Kz>J~MFXpWf*%lqWgGky8s<2FnT!D(4crHhf7BERiit?iO^AEp%@q}LeC@5yzsRa%-WuHjwGp7^a|Pu!){p4@!n z1nKLdkTO-59|T6%4mPUv!jwUI}Y>)b1XZx<|Se2~wnyji!!=4;&4$qES_#i9j5 z1*dY<<5w}51A#kQ%q^p3%-xtKC}3(9%b6w-?jC6!{@;J2ziyOk#J2+O36o}6t_oax zb%}n>Q*ZCM&h`Tn7G>W_I_iDB@K=&WMqhXL;%n)K6Dk(y?p}G!QKvBOb!2gCc*FGO zo%gy^yL+#DXOwL2 z%Y`ytXWQ>+yecnG;`iLpaF^q1^C9s@x%XvHd=9r>NsF(zSh=L2MYA?~XQ4xT<{X#Z z*?pb)7Rk*rllwm*iAWMRkkB0$BgMcW6f|HDlOe~BB1_p*;5e0_-3=BdH zoQxI?Kz>7`MFXpWf*%lqWf>R)I2yPQZtn^Z$ng(R$TCcdjlLOfR=losQfggrbA@S_ zfAp*9tzNAq>~l91Pq30Js*Xu5t;*O^IU`Xn=3bJ2uvAiL=#qlo09nhvk;Za71fcI+qn_8b%67uH%+*A~BZdY}ABXNHw9 z{}q)G_f6cMUxy1_qHyS{d&9Y0dfyL)`q{ zos&+L^v^yq^K|t5^j{Og%3V}GSbp8fe~4RWn@`!qy<3{E+}rePvD(%tK871!t(vns zZAN>2Q_ZcDpX7BzceC6v-{*YN=C5q6fmPX?a)ZoYX_KlQoRiy;& zF_;5^J6e1#qjr389TLz!E@0W`BOS_St!m?7ALJlq^2gN7OT;PLZdyot+H?<_m_}PM z2klhP(Cbz;7W^jX13vnMTI3t^H#q2Two&o863(K$Ro~xzq1Jp?V~1DP8%=6F16&Q2 zPuS^YlgT<-T56j#`yaD)(BXHJ z^Ypf{x3o~ojCk(Q86{>RWy@;sY`@87kBz>wn(JogBAYz#Hdk>6c}MpU1K-DPAqfwC z^&OW59Q?q%$1F1qGM&~~nQ8Kc7=Px# literal 0 HcmV?d00001 diff --git a/tests/grib/test_grib_geography.py b/tests/grib/test_grib_geography.py index cbb81af1..9a9de8ce 100644 --- a/tests/grib/test_grib_geography.py +++ b/tests/grib/test_grib_geography.py @@ -15,7 +15,13 @@ import numpy as np import pytest -from earthkit.data.testing import ARRAY_BACKENDS, check_array_type +import earthkit.data +from earthkit.data.testing import ( + ARRAY_BACKENDS, + check_array_type, + earthkit_examples_file, + earthkit_test_data_file, +) from earthkit.data.utils import projections here = os.path.dirname(__file__) @@ -220,6 +226,152 @@ def test_grib_projection_mercator(fl_type, array_backend): assert projection.globe == dict() +@pytest.mark.parametrize( + "path,expected_value", + [ + (earthkit_examples_file("test.grib"), 4.0), + (earthkit_test_data_file("rgg_small_subarea_cellarea_ref.grib"), "O1280"), + (earthkit_test_data_file("rotated_N32_subarea.grib"), "N32"), + ], +) +def test_grib_resolution(path, expected_value): + ds = earthkit.data.from_source("file", path) + + if isinstance(expected_value, str): + assert ds[0].resolution == expected_value + else: + assert np.isclose(ds[0].resolution, expected_value) + + +@pytest.mark.parametrize( + "path,expected_value", + [ + (earthkit_examples_file("test.grib"), [73.0, -27.0, 33.0, 45.0]), + ( + earthkit_test_data_file("rgg_small_subarea_cellarea_ref.grib"), + [89.877, 36.233, 84.815, 46.185], + ), + ( + earthkit_test_data_file("rotated_N32_subarea.grib"), + [26.511, 0.0, -12.558, 39.375], + ), + ( + earthkit_test_data_file("rotated_wind_20x20.grib"), + [80.0, 0.0, -80.0, 340.0], + ), + ( + earthkit_test_data_file("mercator.grib"), + [16.9775, 291.9722, 19.5221, 296.0156], + ), + ], +) +def test_grib_mars_area(path, expected_value): + ds = earthkit.data.from_source("file", path) + + assert np.allclose(np.asarray(ds[0].mars_area), np.asarray(expected_value)) + + +@pytest.mark.parametrize( + "path,expected_value", + [ + (earthkit_examples_file("test.grib"), [4.0, 4.0]), + ( + earthkit_test_data_file("rgg_small_subarea_cellarea_ref.grib"), + "O1280", + ), + ( + earthkit_test_data_file("rotated_N32_subarea.grib"), + "N32", + ), + ( + earthkit_test_data_file("rotated_wind_20x20.grib"), + [20.0, 20.0], + ), + ( + earthkit_test_data_file("mercator.grib"), + [None, None], + ), + ], +) +def test_grib_mars_grid(path, expected_value): + ds = earthkit.data.from_source("file", path) + + if isinstance(expected_value, str): + assert ds[0].mars_grid == expected_value + elif expected_value == [None, None]: + assert ds[0].mars_grid == expected_value + else: + assert np.allclose(np.asarray(ds[0].mars_grid), np.asarray(expected_value)) + + +def test_grib_grid_points_rotated_ll(): + """The""" + ds = earthkit.data.from_source( + "file", earthkit_test_data_file("rotated_wind_20x20.grib") + ) + + # grid points + res = ds[0].grid_points() + ref1 = np.array([30.0, 29.351052, 27.504876, 24.734374]), np.array( + [140.0, 136.09296, 132.770576, 130.469424] + ) + + ref2 = np.array([-17.968188, -14.787578, -12.22927, -10.573044]), np.array( + [-50.356844, -48.94784, -46.558096, -43.46374] + ) + + assert np.allclose(res[0][:4], ref1[0]) + assert np.allclose(res[1][:4], ref1[1]) + assert np.allclose(res[0][-4:], ref2[0]) + assert np.allclose(res[1][-4:], ref2[1]) + + # unrotated grid points + ds1 = earthkit.data.from_source("file", earthkit_test_data_file("wind_20x20.grib")) + + res = ds[0].grid_points_unrotated() + ref = ds1[0].grid_points() + + assert np.allclose(res[0], ref[0]) + assert np.allclose(res[1], ref[1]) + + +def test_grib_grid_points_rotated_rgg(): + ds = earthkit.data.from_source( + "file", earthkit_test_data_file("rotated_N32_subarea.grib") + ) + + # grid points + res = ds[0].grid_points() + ref1 = np.array([85.489232, 84.81188, 83.171928, 81.086144]), np.array( + [140.0, 110.950144, 92.460416, 82.07156] + ) + + ref2 = np.array([44.011184, 42.14694, 40.199948, 38.1796]), np.array( + [4.244462, 7.003924, 9.575494, 11.973933] + ) + + assert np.allclose(res[0][:4], ref1[0]) + assert np.allclose(res[1][:4], ref1[1]) + assert np.allclose(res[0][-4:], ref2[0]) + assert np.allclose(res[1][-4:], ref2[1]) + + # unrotated grid points + res = ds[0].grid_points_unrotated() + + ref1 = np.array([25.42352666, 23.76898256, 22.12830112, 20.50355176]), np.array( + [43.20872165, 45.29450861, 47.36706898, 49.43030989] + ) + + ref2 = np.array([-23.87417203, -25.52690401, -27.162813, -28.77850622]), np.array( + [43.3371028, 45.74997023, 48.2101817, 50.72330737] + ) + + assert np.allclose(res[0][:4], ref1[0]) + assert np.allclose(res[1][:4], ref1[1]) + assert np.allclose(res[0][-4:], ref2[0]) + assert np.allclose(res[1][-4:], ref2[1]) + + if __name__ == "__main__": from earthkit.data.testing import main From feed6528c766877fcfae09972125085b8d3c091b Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 20 May 2024 18:50:57 +0100 Subject: [PATCH 12/22] Add suport for anemoi-datasets --- src/earthkit/data/wrappers/string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/earthkit/data/wrappers/string.py b/src/earthkit/data/wrappers/string.py index b64afcbb..e1cf41f4 100644 --- a/src/earthkit/data/wrappers/string.py +++ b/src/earthkit/data/wrappers/string.py @@ -37,7 +37,7 @@ class StrWrapper(Wrapper): def __init__(self, data): self.data = data - def to_bounding_box(self): + def bounding_box(self): if "/" in self.data: return tuple(float(x) for x in self.data.split("/")) else: From ee2122303f77c9e4cda6867322726ea688a6a3a1 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Mon, 20 May 2024 18:54:04 +0100 Subject: [PATCH 13/22] Add suport for anemoi-datasets --- src/earthkit/data/core/fieldlist.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/earthkit/data/core/fieldlist.py b/src/earthkit/data/core/fieldlist.py index f9ac550c..b8e4c024 100644 --- a/src/earthkit/data/core/fieldlist.py +++ b/src/earthkit/data/core/fieldlist.py @@ -350,6 +350,10 @@ def grid_points_unrotated(self): lon = self._metadata.geography.longitudes_unrotated() return lat, lon + @property + def rotation(self): + return self._metadata.geography.rotation + @property def resolution(self): return self._metadata.geography.resolution() From 40dca0df573a186c0018f9ac50ee3b7f9c81d630 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 21 May 2024 11:29:40 +0100 Subject: [PATCH 14/22] Add suport for anemoi-datasets --- src/earthkit/data/readers/grib/metadata.py | 8 ++++---- tests/grib/test_grib_geography.py | 13 +++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index be9a30ba..b68d6341 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -197,13 +197,13 @@ def latitudes_unrotated(self, **kwargs): if not self.rotated: return self.latitudes(**kwargs) if not self.rotated_iterator: - from earthkit.geo.rotate import rotate + from earthkit.geo.rotate import unrotate grid_type = self.metadata.get("gridType") warnings.warn(f"ecCodes does not support rotated iterator for {grid_type}") lat, lon = self.latitudes(**kwargs), self.longitudes(**kwargs) south_pole_lat, south_pole_lon, _ = self.rotation - lat, lon = rotate(lat, lon, south_pole_lat, south_pole_lon) + lat, lon = unrotate(lat, lon, south_pole_lat, south_pole_lon) return lat with self.metadata._handle._set_tmp("iteratorDisableUnrotate", 1, 0): @@ -213,13 +213,13 @@ def longitudes_unrotated(self, **kwargs): if not self.rotated: return self.longitudes(**kwargs) if not self.rotated_iterator: - from earthkit.geo.rotate import rotate + from earthkit.geo.rotate import unrotate grid_type = self.metadata.get("gridType") warnings.warn(f"ecCodes does not support rotated iterator for {grid_type}") lat, lon = self.latitudes(**kwargs), self.longitudes(**kwargs) south_pole_lat, south_pole_lon, _ = self.rotation - lat, lon = rotate(lat, lon, south_pole_lat, south_pole_lon) + lat, lon = unrotate(lat, lon, south_pole_lat, south_pole_lon) return lon with self.metadata._handle._set_tmp("iteratorDisableUnrotate", 1, 0): diff --git a/tests/grib/test_grib_geography.py b/tests/grib/test_grib_geography.py index 9a9de8ce..43ac48ac 100644 --- a/tests/grib/test_grib_geography.py +++ b/tests/grib/test_grib_geography.py @@ -342,10 +342,13 @@ def test_grib_grid_points_rotated_rgg(): # grid points res = ds[0].grid_points() + + # front ref1 = np.array([85.489232, 84.81188, 83.171928, 81.086144]), np.array( [140.0, 110.950144, 92.460416, 82.07156] ) + # back ref2 = np.array([44.011184, 42.14694, 40.199948, 38.1796]), np.array( [4.244462, 7.003924, 9.575494, 11.973933] ) @@ -358,12 +361,14 @@ def test_grib_grid_points_rotated_rgg(): # unrotated grid points res = ds[0].grid_points_unrotated() - ref1 = np.array([25.42352666, 23.76898256, 22.12830112, 20.50355176]), np.array( - [43.20872165, 45.29450861, 47.36706898, 49.43030989] + # front + ref1 = np.array([26.510768, 26.51076943, 26.5107701, 26.51076846]), np.array( + [1.28492181e-15, 2.81250046e00, 5.62500163e00, 8.43749805e00] ) - ref2 = np.array([-23.87417203, -25.52690401, -27.162813, -28.77850622]), np.array( - [43.3371028, 45.74997023, 48.2101817, 50.72330737] + # back + ref2 = np.array([-12.55775535, -12.55775697, -12.55775699, -12.5577565]), np.array( + [30.93749931, 33.75000128, 36.56250084, 39.37500023] ) assert np.allclose(res[0][:4], ref1[0]) From 2b49ff714a92e7f27c9ec717260e500e430d6999 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Wed, 22 May 2024 16:18:07 +0100 Subject: [PATCH 15/22] Add test for remapping --- src/earthkit/data/core/index.py | 8 +++++--- tests/grib/test_grib_order_by.py | 15 +++++++++++++++ tests/grib/test_grib_sel.py | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/earthkit/data/core/index.py b/src/earthkit/data/core/index.py index cbf3beb1..5b97488a 100644 --- a/src/earthkit/data/core/index.py +++ b/src/earthkit/data/core/index.py @@ -277,13 +277,15 @@ def sel(self, *args, remapping=None, **kwargs): Using ``remapping`` to specify the selection by a key created from two other keys (we created key "param_level" from "param" and "levelist"): - >>> for f in ds.order_by( + >>> subset = ds.sel( ... param_level=["t850", "u1000"], ... remapping={"param_level": "{param}{levelist}"}, - ... ): + ... ) + >>> for f in subset: ... print(f) - GribField(t,850,20180801,1200,0,0) + ... GribField(u,1000,20180801,1200,0,0) + GribField(t,850,20180801,1200,0,0) """ kwargs = normalize_selection(*args, **kwargs) kwargs = self._normalize_kwargs_names(**kwargs) diff --git a/tests/grib/test_grib_order_by.py b/tests/grib/test_grib_order_by.py index 87793476..48a3b227 100644 --- a/tests/grib/test_grib_order_by.py +++ b/tests/grib/test_grib_order_by.py @@ -198,3 +198,18 @@ def test_grib_order_by_valid_datetime(fl_type, array_backend): ] assert g.metadata("valid_datetime") == ref + + +@pytest.mark.parametrize("fl_type", FL_TYPES) +@pytest.mark.parametrize("array_backend", ARRAY_BACKENDS) +def test_grib_order_by_remapping(fl_type, array_backend): + ds = load_grib_data("test6.grib", fl_type, array_backend) + + ordering = ["t850", "t1000", "u1000", "v850", "v1000", "u850"] + ref = [("t", 850), ("t", 1000), ("u", 1000), ("v", 850), ("v", 1000), ("u", 850)] + + r = ds.order_by( + param_level=ordering, remapping={"param_level": "{param}{levelist}"} + ) + + assert r.metadata("param", "level") == ref diff --git a/tests/grib/test_grib_sel.py b/tests/grib/test_grib_sel.py index b56e9060..3aa88ba6 100644 --- a/tests/grib/test_grib_sel.py +++ b/tests/grib/test_grib_sel.py @@ -340,6 +340,26 @@ def test_grib_isel_slice_multi_file(fl_type, array_backend): ] +@pytest.mark.parametrize("fl_type", FL_TYPES) +@pytest.mark.parametrize("array_backend", ARRAY_BACKENDS) +def test_grib_sel_remapping_1(fl_type, array_backend): + ds = load_grib_data("test6.grib", fl_type, array_backend) + ref = [("t", 850)] + r = ds.sel(param_level="t850", remapping={"param_level": "{param}{levelist}"}) + assert r.metadata("param", "level") == ref + + +@pytest.mark.parametrize("fl_type", FL_TYPES) +@pytest.mark.parametrize("array_backend", ARRAY_BACKENDS) +def test_grib_sel_remapping_2(fl_type, array_backend): + ds = load_grib_data("test6.grib", fl_type, array_backend) + ref = [("u", 1000), ("t", 850)] + r = ds.sel( + param_level=["t850", "u1000"], remapping={"param_level": "{param}{levelist}"} + ) + assert r.metadata("param", "level") == ref + + if __name__ == "__main__": from earthkit.data.testing import main From ce06623d39b953001500c15ddc87e1ea1fa8e7e6 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 23 May 2024 14:36:37 +0100 Subject: [PATCH 16/22] Fix netcdf field memory usage --- src/earthkit/data/readers/netcdf/field.py | 36 ++++++++++++------- src/earthkit/data/readers/netcdf/fieldlist.py | 3 +- tests/grib/test_grib_cube.py | 28 +++++++++++++++ 3 files changed, 53 insertions(+), 14 deletions(-) create mode 100644 tests/grib/test_grib_cube.py diff --git a/src/earthkit/data/readers/netcdf/field.py b/src/earthkit/data/readers/netcdf/field.py index c2aec69a..7789a888 100644 --- a/src/earthkit/data/readers/netcdf/field.py +++ b/src/earthkit/data/readers/netcdf/field.py @@ -26,9 +26,9 @@ class XArrayFieldGeography(Geography): - def __init__(self, metadata, data_array, ds, variable): + def __init__(self, metadata, ds, variable): self.metadata = metadata - self.data_array = data_array + self.variable = variable self.ds = ds self.north, self.west, self.south, self.east = self.ds.bbox(variable) @@ -59,9 +59,11 @@ def bounding_box(self): north=self.north, south=self.south, east=self.east, west=self.west ) + @cached_property def _grid_mapping(self): - if "grid_mapping" in self.data_array.attrs: - grid_mapping = self.ds[self.data_array.attrs["grid_mapping"]] + da = self.data_array + if "grid_mapping" in da.attrs: + grid_mapping = self.ds[da.attrs["grid_mapping"]] else: raise AttributeError( "no CF-compliant 'grid_mapping' detected in netCDF attributes" @@ -71,6 +73,10 @@ def _grid_mapping(self): def gridspec(self): raise NotImplementedError("gridspec is not implemented for netcdf/xarray") + @property + def data_array(self): + return self.ds[self.variable] + def resolution(self): # TODO: implement resolution return None @@ -102,7 +108,8 @@ def __init__(self, field): self._field = field self._geo = None - d = dict(self._field._da.attrs) + data_array = field._ds[field.variable] + d = dict(data_array.attrs) time = field.non_dim_coords.get("valid_time", field.non_dim_coords.get("time")) level = None @@ -151,7 +158,7 @@ def override(self, *args, **kwargs): def geography(self): if self._geo is None: self._geo = XArrayFieldGeography( - self, self._field._da, self._field._ds, self._field.variable + self, self._field._ds, self._field.variable ) return self._geo @@ -207,10 +214,6 @@ class XArrayField(Field): def __init__(self, ds, variable, slices, non_dim_coords, array_backend): super().__init__(array_backend) self._ds = ds - self._da = ds[variable] - - # self.north, self.west, self.south, self.east = ds.bbox(variable) - self.variable = variable self.slices = slices self.non_dim_coords = non_dim_coords @@ -227,19 +230,26 @@ def _make_metadata(self): return XArrayMetadata(self) def to_xarray(self): - dims = self._da.dims + da = self._ds[self.variable] + dims = da.dims v = {} for s in self.slices: if s.is_dimension: if s.name in dims: v[s.name] = s.index - return self._da.isel(**v) + return da.isel(**v) def to_pandas(self): return self.to_xarray().to_pandas() + # def _to_numpy(self): + # return self.to_xarray().to_numpy() + def _to_numpy(self): - return self.to_xarray().to_numpy() + dimensions = dict((s.name, s.index) for s in self.slices) + # values = self.owner.xr_dataset[self.variable].isel(dimensions).values + values = self._ds[self.variable].isel(dimensions).values + return values def _values(self, dtype=None): if dtype is None: diff --git a/src/earthkit/data/readers/netcdf/fieldlist.py b/src/earthkit/data/readers/netcdf/fieldlist.py index 8af58329..0de8074f 100644 --- a/src/earthkit/data/readers/netcdf/fieldlist.py +++ b/src/earthkit/data/readers/netcdf/fieldlist.py @@ -145,6 +145,7 @@ def _skip_attr(v, attr_name): if check_only: return False + return fields @@ -171,7 +172,7 @@ def has_fields(self): check_only=True, ) else: - return len(self._fields) + return len(self._fields) > 0 def _get_fields(self, ds): return get_fields_from_ds(ds, self.array_backend, field_type=self.FIELD_TYPE) diff --git a/tests/grib/test_grib_cube.py b/tests/grib/test_grib_cube.py new file mode 100644 index 00000000..9950d1ff --- /dev/null +++ b/tests/grib/test_grib_cube.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + + +from earthkit.data import from_source +from earthkit.data.testing import earthkit_examples_file + + +def test_grib_cube(): + ds = from_source("file", earthkit_examples_file("tuv_pl.grib")) + c = ds.cube("param", "level") + + assert c.user_shape == (3, 6) + assert c.field_shape == (7, 12) + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main() From 9ce78283fee0c0de5054db7896880c6f3550b636 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 23 May 2024 14:59:33 +0100 Subject: [PATCH 17/22] Fix netcdf field memory usage --- src/earthkit/data/readers/netcdf/field.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/earthkit/data/readers/netcdf/field.py b/src/earthkit/data/readers/netcdf/field.py index 7789a888..d1fff820 100644 --- a/src/earthkit/data/readers/netcdf/field.py +++ b/src/earthkit/data/readers/netcdf/field.py @@ -59,7 +59,6 @@ def bounding_box(self): north=self.north, south=self.south, east=self.east, west=self.west ) - @cached_property def _grid_mapping(self): da = self.data_array if "grid_mapping" in da.attrs: From 11c1170c86398f7fac1a49873f2f6af86b03e8b8 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 24 May 2024 09:11:43 +0100 Subject: [PATCH 18/22] Add support for anemoi-datasets --- docs/requirements.txt | 2 +- src/earthkit/data/readers/grib/metadata.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 410bb95e..fabb4f4e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,7 +2,7 @@ ipykernel docutils Pygments>=2.6.1 -Sphinx +Sphinx>=7.3.7 sphinx-rtd-theme nbsphinx setuptools diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index b68d6341..f87c76b4 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -27,6 +27,7 @@ def missing_is_none(x): class GribFieldGeography(Geography): def __init__(self, metadata): self.metadata = metadata + self.check_rotated_support() def latitudes(self, dtype=None): r"""Return the latitudes of the field. @@ -193,9 +194,19 @@ def rotated(self): def rotated_iterator(self): return self.metadata.get("iteratorDisableUnrotate") is not None + def check_rotated_support(self): + if self.rotated and self.metadata.get("gridType") == "reduced_rotated_gg": + from earthkit.data.utils.message import ECC_FEATURES + + if not ECC_FEATURES.versions["eccodes"] >= (2, 35, 0): + raise RuntimeError( + "gridType=rotated_reduced_gg requires ecCodes >= 2.35.0" + ) + def latitudes_unrotated(self, **kwargs): if not self.rotated: return self.latitudes(**kwargs) + if not self.rotated_iterator: from earthkit.geo.rotate import unrotate @@ -212,6 +223,7 @@ def latitudes_unrotated(self, **kwargs): def longitudes_unrotated(self, **kwargs): if not self.rotated: return self.longitudes(**kwargs) + if not self.rotated_iterator: from earthkit.geo.rotate import unrotate From 8e12525d3f5413768d7b3f7ab287f3f58d26838e Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Fri, 24 May 2024 10:17:31 +0100 Subject: [PATCH 19/22] Add support for anemoi-datasets --- src/earthkit/data/readers/grib/metadata.py | 2 +- src/earthkit/data/utils/message.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/earthkit/data/readers/grib/metadata.py b/src/earthkit/data/readers/grib/metadata.py index f87c76b4..698b049d 100644 --- a/src/earthkit/data/readers/grib/metadata.py +++ b/src/earthkit/data/readers/grib/metadata.py @@ -198,7 +198,7 @@ def check_rotated_support(self): if self.rotated and self.metadata.get("gridType") == "reduced_rotated_gg": from earthkit.data.utils.message import ECC_FEATURES - if not ECC_FEATURES.versions["eccodes"] >= (2, 35, 0): + if not ECC_FEATURES.version >= (2, 35, 0): raise RuntimeError( "gridType=rotated_reduced_gg requires ecCodes >= 2.35.0" ) diff --git a/src/earthkit/data/utils/message.py b/src/earthkit/data/utils/message.py index 7d9ada11..26a7ae34 100644 --- a/src/earthkit/data/utils/message.py +++ b/src/earthkit/data/utils/message.py @@ -60,6 +60,10 @@ def check_clone_kwargs(self, **kwargs): def versions(self): return f"ecCodes: {self._version} eccodes-python: {self._py_version}" + @property + def version(self): + return self._version + ECC_FEATURES = EccodesFeatures() From c7f2d21f6dffc03f4e32e89c532f2fb26ec831d8 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 28 May 2024 11:07:09 +0100 Subject: [PATCH 20/22] Add test for cube --- tests/grib/test_grib_cube.py | 104 +++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/tests/grib/test_grib_cube.py b/tests/grib/test_grib_cube.py index 9950d1ff..214245ea 100644 --- a/tests/grib/test_grib_cube.py +++ b/tests/grib/test_grib_cube.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. # +import numpy as np from earthkit.data import from_source from earthkit.data.testing import earthkit_examples_file @@ -20,6 +21,109 @@ def test_grib_cube(): assert c.user_shape == (3, 6) assert c.field_shape == (7, 12) + assert c.extended_user_shape == (3, 6, 7, 12) + assert c.count() == 18 + + # this slice is a field + r = c[0, 0] + assert r.shape == (7, 12) + assert r.metadata(["param", "level"]) == ["t", 300] + assert r.to_numpy().shape == (7, 12) + assert np.isclose(r.to_numpy()[0, 0], 226.6531524658203) + + # this slice is a cube + r = c[0, 0:2] + assert r.user_shape == (1, 2) + assert r.field_shape == (7, 12) + assert r.extended_user_shape == (1, 2, 7, 12) + assert r.count() == 2 + assert r.to_numpy().shape == (1, 2, 7, 12) + assert np.isclose(r.to_numpy()[0, 0, 0, 0], 226.6531524658203) + + ref_meta = (["t", 300], ["t", 400]) + + for i in range(len(ref_meta)): + assert ( + r[0, i].metadata(["param", "level"]) == ref_meta[i] + ), f"{i=} ref_meta={ref_meta[i]}" + + # this slice is a cube + r = c[1:3, 0:2] + assert r.user_shape == (2, 2) + assert r.field_shape == (7, 12) + assert r.extended_user_shape == (2, 2, 7, 12) + assert r.count() == 4 + assert r.to_numpy().shape == (2, 2, 7, 12) + assert np.isclose(r.to_numpy()[0, 0, 0, 0], 10.455490112304688) + + ref_meta = (["u", 300], ["u", 400], ["v", 300], ["v", 400]) + cnt = 0 + for par in range(2): + for level in range(2): + assert ( + r[par, level].metadata(["param", "level"]) == ref_meta[cnt] + ), f"{cnt=} ref_meta={ref_meta[cnt]}" + cnt += 1 + + # this slice is a cube + r = c[1, ...] + assert r.user_shape == (1, 6) + assert r.field_shape == (7, 12) + assert r.extended_user_shape == (1, 6, 7, 12) + assert r.count() == 6 + assert r.to_numpy().shape == (1, 6, 7, 12) + assert np.isclose(r.to_numpy()[0, 0, 0, 0], 10.455490112304688) + + ref_meta = (["u", 300], ["u", 400], ["u", 500], ["u", 700], ["u", 850], ["u", 1000]) + + cnt = 0 + for par in range(1): + for level in range(6): + assert ( + r[par, level].metadata(["param", "level"]) == ref_meta[cnt] + ), f"{cnt=} ref_meta={ref_meta[cnt]}" + cnt += 1 + + +def test_grib_cubelet(): + ds = from_source("file", earthkit_examples_file("tuv_pl.grib")) + c = ds.cube("param", "level") + + reading_chunks = None + assert c.count(reading_chunks) == 18 + + ic = [] + for i in range(3): + for j in range(6): + ic.append((i, j)) + + ref = [ + 226.6531524658203, + 244.00323486328125, + 255.8430633544922, + 271.26531982421875, + 272.53916931152344, + 272.5641784667969, + 10.455490112304688, + 6.213775634765625, + 4.7400054931640625, + 4.1455230712890625, + -4.89837646484375, + -6.2868804931640625, + -3.07965087890625, + -0.050384521484375, + -0.9900970458984375, + 3.6385498046875, + 8.660964965820312, + 7.8334808349609375, + ] + + for i, cb in enumerate(c.iterate_cubelets(reading_chunks)): + assert cb.extended_icoords == ic[i] + + for i, cb in enumerate(c.iterate_cubelets(reading_chunks)): + assert cb.to_numpy().shape == (7, 12) + assert np.isclose(cb.to_numpy()[0, 0], ref[i]) if __name__ == "__main__": From 290ef68b3ea70340ed552d95fcc49d904191fb64 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 28 May 2024 13:57:25 +0100 Subject: [PATCH 21/22] Set eathkit-geo version --- environment.yml | 2 +- pyproject.toml | 1 + tests/downstream-ci-requirements.txt | 2 +- tests/environment-unit-tests.yml | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/environment.yml b/environment.yml index 13e4d158..363ba7a5 100644 --- a/environment.yml +++ b/environment.yml @@ -28,7 +28,7 @@ dependencies: - polytope-client>=0.7.4 - earthkit-meteo>=0.0.1 - eccovjson>=0.0.5 - - earthkit-geo + - earthkit-geo>=0.2.0 - tqdm>=4.63.0 - markdown - make diff --git a/pyproject.toml b/pyproject.toml index 68688f6a..8d3bbd5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "pyyaml", "tqdm>=4.63.0", "xarray>=0.19.0", + "earthkit-geo>=0.2.0", "earthkit-meteo>=0.0.1" ] description = "A format-agnostic Python interface for geospatial data" diff --git a/tests/downstream-ci-requirements.txt b/tests/downstream-ci-requirements.txt index 94f4d868..534efe7b 100644 --- a/tests/downstream-ci-requirements.txt +++ b/tests/downstream-ci-requirements.txt @@ -21,7 +21,7 @@ torch tqdm>=4.63.0 xarray>=0.19.0 earthkit-meteo>=0.0.1 -earthkit-geo +earthkit-geo>=0.2.0 git+https://github.com/ecmwf/earthkit-data-demo-source geopandas cartopy diff --git a/tests/environment-unit-tests.yml b/tests/environment-unit-tests.yml index fc329db9..14f936c9 100644 --- a/tests/environment-unit-tests.yml +++ b/tests/environment-unit-tests.yml @@ -31,7 +31,7 @@ dependencies: - earthkit-meteo>=0.0.1 - git+https://github.com/ecmwf/earthkit-data-demo-source - eccovjson>=0.0.5 - - earthkit-geo + - earthkit-geo>=0.2.0 - tqdm>=4.63.0 - markdown - make From 5339d2350c05b92d232a9580369314709eb93168 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Tue, 28 May 2024 14:12:14 +0100 Subject: [PATCH 22/22] Disable opendap notebook in tests --- tests/documentation/test_notebooks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/documentation/test_notebooks.py b/tests/documentation/test_notebooks.py index 6b1a4301..d5bb4b72 100644 --- a/tests/documentation/test_notebooks.py +++ b/tests/documentation/test_notebooks.py @@ -33,6 +33,7 @@ "demo_source_plugin.ipynb", "ecmwf_open_data.ipynb", "shapefile.ipynb", + "netcdf_opendap.ipynb", ] if NO_PYTORCH: