Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for anemoi-datasets #383

Merged
merged 25 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
720fe7d
Add suport for anemoi-datasets
sandorkertesz May 9, 2024
7bc0772
Add suport for anemoi-datasets
sandorkertesz May 9, 2024
e80e8b9
Add suport for anemoi-datasets
sandorkertesz May 10, 2024
baefc8e
Add suport for anemoi-datasets
sandorkertesz May 10, 2024
6303c90
Merge branch 'develop' into feature/anemoi
sandorkertesz May 13, 2024
04df73d
Merge branch 'develop' into feature/anemoi
sandorkertesz May 14, 2024
a1791e8
Merge from develop
sandorkertesz May 15, 2024
2d1617c
Add suport for anemoi-datasets
sandorkertesz May 15, 2024
88f6113
Add suport for anemoi-datasets
sandorkertesz May 15, 2024
a222790
Add suport for anemoi-datasets
sandorkertesz May 15, 2024
8f8f9eb
Add suport for anemoi-datasets
sandorkertesz May 16, 2024
9b21e5c
Add suport for anemoi-datasets
sandorkertesz May 16, 2024
c94f37d
Add suport for anemoi-datasets
sandorkertesz May 16, 2024
afb5e79
Add suport for anemoi-datasets
sandorkertesz May 20, 2024
feed652
Add suport for anemoi-datasets
sandorkertesz May 20, 2024
ee21223
Add suport for anemoi-datasets
sandorkertesz May 20, 2024
40dca0d
Add suport for anemoi-datasets
sandorkertesz May 21, 2024
2b49ff7
Add test for remapping
sandorkertesz May 22, 2024
ce06623
Fix netcdf field memory usage
sandorkertesz May 23, 2024
9ce7828
Fix netcdf field memory usage
sandorkertesz May 23, 2024
11c1170
Add support for anemoi-datasets
sandorkertesz May 24, 2024
8e12525
Add support for anemoi-datasets
sandorkertesz May 24, 2024
c7f2d21
Add test for cube
sandorkertesz May 28, 2024
290ef68
Set eathkit-geo version
sandorkertesz May 28, 2024
5339d23
Disable opendap notebook in tests
sandorkertesz May 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/earthkit/data/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def order_by(self, *args, **kwargs):
"""Reorder the elements of the object."""
self._not_implemented()

def unique_values(self, *coords, remapping=None, progress_bar=True):
def unique_values(self, *coords, remapping=None, patches=None, progress_bar=True):
"""
Given a list of metadata attributes, such as date, param, levels,
returns the list of unique values for each attributes
Expand All @@ -109,7 +109,7 @@ def unique_values(self, *coords, remapping=None, progress_bar=True):
assert len(coords)
assert all(isinstance(k, str) for k in coords), coords

remapping = build_remapping(remapping)
remapping = build_remapping(remapping, patches)
iterable = self

if progress_bar:
Expand All @@ -118,16 +118,16 @@ def unique_values(self, *coords, remapping=None, progress_bar=True):
desc=f"Finding coords in dataset for {coords}",
)

dic = defaultdict(dict)
vals = defaultdict(dict)
for f in iterable:
metadata = remapping(f.metadata)
for k in coords:
v = metadata(k)
dic[k][v] = True
v = metadata(k, default=None)
vals[k][v] = True

dic = {k: tuple(values.keys()) for k, values in dic.items()}
vals = {k: tuple(values.keys()) for k, values in vals.items()}

return dic
return vals

# @abstractmethod
# def to_points(self, *args, **kwargs):
Expand Down
51 changes: 50 additions & 1 deletion src/earthkit/data/core/fieldlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from collections import defaultdict

from earthkit.data.core import Base
from earthkit.data.core.index import Index
from earthkit.data.core.index import Index, MaskIndex, MultiIndex
from earthkit.data.decorators import cached_method, detect_out_filename
from earthkit.data.utils.array import ensure_backend, numpy_backend
from earthkit.data.utils.metadata import metadata_argument
Expand Down Expand Up @@ -341,6 +341,31 @@ def to_latlon(self, flatten=False, dtype=None):
lon, lat = self.data(("lon", "lat"), flatten=flatten, dtype=dtype)
return dict(lat=lat, lon=lon)

def grid_points(self):
r = self.to_latlon(flatten=True)
return r["lat"], r["lon"]

def grid_points_unrotated(self):
lat = self._metadata.geography.latitudes_unrotated()
lon = self._metadata.geography.longitudes_unrotated()
return lat, lon

@property
def rotation(self):
return self._metadata.geography.rotation

@property
def resolution(self):
return self._metadata.geography.resolution()

@property
def mars_grid(self):
return self._metadata.geography.mars_grid()

@property
def mars_area(self):
return self._metadata.geography.mars_area()

@property
def shape(self):
r"""tuple: Get the shape of the field.
Expand Down Expand Up @@ -1420,3 +1445,27 @@ def to_fieldlist(self, array_backend=None, **kwargs):
def _to_array_fieldlist(self, **kwargs):
md = [f.metadata() for f in self]
return self.from_array(self.to_array(**kwargs), md)

def cube(self, *args, **kwargs):
from earthkit.data.indexing.cube import FieldCube

return FieldCube(self, *args, **kwargs)

@classmethod
def new_mask_index(self, *args, **kwargs):
return MaskFieldList(*args, **kwargs)

@classmethod
def merge(cls, sources):
assert all(isinstance(_, FieldList) for _ in sources)
return MultiFieldList(sources)


class MaskFieldList(FieldList, MaskIndex):
def __init__(self, *args, **kwargs):
MaskIndex.__init__(self, *args, **kwargs)


class MultiFieldList(FieldList, MultiIndex):
def __init__(self, *args, **kwargs):
MultiIndex.__init__(self, *args, **kwargs)
12 changes: 12 additions & 0 deletions src/earthkit/data/core/geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,15 @@ def gridspec(self):
:class:`~data.core.gridspec.GridSpec>`
"""
pass

@abstractmethod
def resolution(self):
pass

@abstractmethod
def mars_grid(self):
pass

@abstractmethod
def mars_area(self):
pass
55 changes: 31 additions & 24 deletions src/earthkit/data/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def build_actions(self, kwargs):
raise NotImplementedError()

def compare_elements(self, a, b):
assert callable(self.remapping), (type(self.remapping), self.remapping)
a_metadata = self.remapping(a.metadata)
b_metadata = self.remapping(b.metadata)
for k, v in self.actions.items():
Expand All @@ -120,18 +121,18 @@ def build_actions(self, kwargs):
def ascending(a, b):
if a == b:
return 0
if a > b:
if b is None or a > b:
return 1
if a < b:
if a is None or a < b:
return -1
raise ValueError(f"{a},{b}")

def descending(a, b):
if a == b:
return 0
if a > b:
if b is None or a > b:
return -1
if a < b:
if a is None or a < b:
return 1
raise ValueError(f"{a},{b}")

Expand Down Expand Up @@ -169,6 +170,9 @@ def get(self, x):
order[int(key)] = i
except ValueError:
pass
except TypeError:
print('Cannot convert "%s" to int (%s)' % (key, type(key)))
raise
try:
order[float(key)] = i
except ValueError:
Expand Down Expand Up @@ -273,13 +277,15 @@ def sel(self, *args, remapping=None, **kwargs):
Using ``remapping`` to specify the selection by a key created from two other keys
(we created key "param_level" from "param" and "levelist"):

>>> for f in ds.order_by(
>>> subset = ds.sel(
... param_level=["t850", "u1000"],
... remapping={"param_level": "{param}{levelist}"},
... ):
... )
>>> for f in subset:
... print(f)
GribField(t,850,20180801,1200,0,0)
...
GribField(u,1000,20180801,1200,0,0)
GribField(t,850,20180801,1200,0,0)
"""
kwargs = normalize_selection(*args, **kwargs)
kwargs = self._normalize_kwargs_names(**kwargs)
Expand Down Expand Up @@ -391,7 +397,7 @@ def isel(self, *args, **kwargs):

return self.sel(**kwargs)

def order_by(self, *args, remapping=None, **kwargs):
def order_by(self, *args, remapping=None, patches=None, **kwargs):
"""Changes the order of the elements in a fieldlist-like object.

Parameters
Expand Down Expand Up @@ -488,7 +494,7 @@ def order_by(self, *args, remapping=None, **kwargs):
kwargs = normalize_order_by(*args, **kwargs)
kwargs = self._normalize_kwargs_names(**kwargs)

remapping = build_remapping(remapping)
remapping = build_remapping(remapping, patches)

if not kwargs:
return self
Expand All @@ -507,38 +513,39 @@ def cmp(i, j):

def __getitem__(self, n):
if isinstance(n, slice):
return self.from_slice(n)
return self._from_slice(n)
if isinstance(n, (tuple, list)):
return self.from_multi(n)
return self._from_sequence(n)
if isinstance(n, dict):
return self.from_dict(n)
return self._from_dict(n)
else:
import numpy as np

if isinstance(n, np.ndarray):
return self.from_multi(n)
return self._from_ndarray(n)

return self._getitem(n)

def from_slice(self, s):
def _from_slice(self, s):
indices = range(len(self))[s]
return self.new_mask_index(self, indices)

def from_mask(self, lst):
def _from_mask(self, lst):
indices = [i for i, x in enumerate(lst) if x]
return self.new_mask_index(self, indices)

def from_multi(self, a):
import numpy as np
def _from_sequence(self, s):
return self.new_mask_index(self, s)

if not isinstance(a, list):
a = list(a)
def _from_ndarray(self, a):
return self._from_sequence(a.tolist())
# import numpy as np

# will raise IndexError if an index is out of bounds
n = len(self)
indices = np.arange(0, n if n > 0 else 0)
indices = indices[a].tolist()
return self.new_mask_index(self, indices)
# # will raise IndexError if an index is out of bounds
# n = len(self)
# indices = np.arange(0, n if n > 0 else 0)
# indices = indices[a].tolist()
# return self.new_mask_index(self, indices)

def from_dict(self, dic):
return self.sel(dic)
Expand Down
2 changes: 1 addition & 1 deletion src/earthkit/data/core/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _get_custom_key(self, key, default=None, raise_on_missing=True, **kwargs):
if self._is_custom_key(key):
try:
if key == DATETIME:
return self._valid_datetime()
return self._valid_datetime().isoformat()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels a little strange to return a str rather than a datetime here. Could the client (anemoi-datasets) not perform the conversion to string?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requires further discussion.

elif key == GRIDSPEC:
return self.grid_spec
except Exception as e:
Expand Down
69 changes: 59 additions & 10 deletions src/earthkit/data/core/order.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,20 @@
LOG = logging.getLogger(__name__)


class Remapping:
class Remapping(dict):
# inherit from dict to make it serialisable

def __init__(self, remapping):
self.remapping = {}
super().__init__(remapping)

self.lists = {}
for k, v in remapping.items():
m = re.split(r"\{([^}]*)\}", v)
self.remapping[k] = m
if isinstance(v, str):
v = re.split(r"\{([^}]*)\}", v)
self.lists[k] = v

def __call__(self, func):
if self.remapping is None or not self.remapping:
if not self:
return func

class CustomJoiner:
Expand All @@ -43,9 +47,12 @@ def wrapped(name, **kwargs):
return wrapped

def substitute(self, name, joiner, **kwargs):
if name in self.remapping:
if name in self.lists:
if callable(self.lists[name]):
return self.lists[name]()

lst = []
for i, bit in enumerate(self.remapping[name]):
for i, bit in enumerate(self.lists[name]):
if i % 2:
p = joiner.format_name(bit, **kwargs)
if p is not None:
Expand All @@ -58,19 +65,61 @@ def substitute(self, name, joiner, **kwargs):
return joiner.format_name(name, **kwargs)

def as_dict(self):
return self.remapping
return dict(self)


def build_remapping(mapping):
def _build_remapping(mapping):
if mapping is None:
return Remapping({})

if isinstance(mapping, dict):
if not isinstance(mapping, Remapping) and isinstance(mapping, dict):
return Remapping(mapping)

return mapping


class Patch(dict):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to have a little description of what Patch is for

# inherit from dict to make it serialisable

def __init__(self, proc, name, patch):
self.proc = proc
self.name = name

if isinstance(patch, dict):
self.patch = lambda x: patch.get(x, x)
elif isinstance(patch, (int, bool, float, str)) or patch is None:
self.patch = lambda x: patch
else:
assert callable(patch)
self.patch = patch

# For JSON, we simply forward to the remapping
super().__init__(proc.as_dict())

def __call__(self, func):
next = self.proc(func)

def wrapped(name, **kwargs):
result = next(name, **kwargs)
if name == self.name:
result = self.patch(result)
return result

return wrapped
# assert False, (name, self.proc, self.name, self.patch)

def as_dict(self):
return dict(self)


def build_remapping(mapping, patches=None):
result = _build_remapping(mapping)
if patches:
for k, v in patches.items():
result = Patch(result, k, v)
return result


def normalize_order_by(*args, **kwargs):
_kwargs = {}
for a in args:
Expand Down
Loading
Loading