diff --git a/ecml_tools/create/functions/actions/mars.py b/ecml_tools/create/functions/actions/mars.py index 525d9ee..4d7cefc 100644 --- a/ecml_tools/create/functions/actions/mars.py +++ b/ecml_tools/create/functions/actions/mars.py @@ -55,7 +55,14 @@ def _expand_mars_request(request, date): "step": s, } ) + + for pproc in ("grid", "rotation", "frame", "area", "bitmap", "resol"): + if pproc in r: + if isinstance(r[pproc], (list, tuple)): + r[pproc] = "/".join(str(x) for x in r[pproc]) + requests.append(r) + return requests @@ -68,7 +75,11 @@ def factorise_requests(dates, *requests): updates += _expand_mars_request(req, date=d) compressed = Availability(updates) - return compressed.iterate() + for r in compressed.iterate(): + for k, v in r.items(): + if isinstance(v, (list, tuple)) and len(v) == 1: + r[k] = v[0] + yield r def mars(context, dates, *requests, **kwargs): @@ -109,9 +120,7 @@ def mars(context, dates, *requests, **kwargs): """ ) - dates = yaml.safe_load( - "[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]" - ) + dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]") dates = to_datetime_list(dates) DEBUG = True diff --git a/ecml_tools/create/functions/steps/unrotate_winds.py b/ecml_tools/create/functions/steps/unrotate_winds.py index c22b8df..c5fcde0 100644 --- a/ecml_tools/create/functions/steps/unrotate_winds.py +++ b/ecml_tools/create/functions/steps/unrotate_winds.py @@ -47,9 +47,7 @@ def rotate_winds( new_x = np.zeros_like(x_wind) new_y = np.zeros_like(y_wind) - for i, (vx, vy, lat, lon, raw_lat, raw_lon) in enumerate( - zip(x_wind, y_wind, lats, lons, raw_lats, raw_lons) - ): + for i, (vx, vy, lat, lon, raw_lat, raw_lon) in enumerate(zip(x_wind, y_wind, lats, lons, raw_lats, raw_lons)): lonRotated = south_pole_longitude - lon lon_rotated = normalise_longitude(lonRotated, -180) lon_unrotated = raw_lon @@ -79,13 +77,13 @@ def __getattr__(self, name): return getattr(self.field, name) -def execute(context, input, x_wind, y_wind): +def execute(context, input, u, v): """ Unrotate the wind components of a GRIB file. """ result = FieldArray() - wind_params = (x_wind, y_wind) + wind_params = (u, v) wind_pairs = defaultdict(dict) for f in input: @@ -107,15 +105,15 @@ def execute(context, input, x_wind, y_wind): if len(pairs) != 2: raise ValueError("Missing wind component") - x = pairs[x_wind] - y = pairs[y_wind] + x = pairs[u] + y = pairs[v] lats, lons = x.grid_points() raw_lats, raw_longs = x.grid_points_raw() assert x.rotation == y.rotation - x_new, y_new = rotate_winds( + u_new, v_new = rotate_winds( lats, lons, raw_lats, @@ -125,8 +123,8 @@ def execute(context, input, x_wind, y_wind): *x.rotation, ) - result.append(NewDataField(x, x_new)) - result.append(NewDataField(y, y_new)) + result.append(NewDataField(x, u_new)) + result.append(NewDataField(y, v_new)) return result diff --git a/ecml_tools/data.py b/ecml_tools/data.py index af37184..94cdd0f 100644 --- a/ecml_tools/data.py +++ b/ecml_tools/data.py @@ -10,6 +10,7 @@ import logging import os import re +import textwrap import warnings from functools import cached_property, wraps from pathlib import PurePath @@ -83,6 +84,32 @@ class MissingDate(Exception): pass +class Node: + def __init__(self, dataset, kids, **kwargs): + self.dataset = dataset + self.kids = kids + self.kwargs = kwargs + + def _put(self, indent, result): + + def _spaces(indent): + return " " * indent if indent else "" + + result.append(f"{_spaces(indent)}{self.dataset.__class__.__name__}") + for k, v in self.kwargs.items(): + if isinstance(v, (list, tuple)): + v = ", ".join(str(i) for i in v) + v = textwrap.shorten(v, width=40, placeholder="...") + result.append(f"{_spaces(indent+2)}{k}: {v}") + for kid in self.kids: + kid._put(indent + 2, result) + + def __repr__(self): + result = [] + self._put(0, result) + return "\n".join(result) + + class Dataset: arguments = {} @@ -106,15 +133,15 @@ def _subset(self, **kwargs): if "select" in kwargs: select = kwargs.pop("select") - return Select(self, self._select_to_columns(select))._subset(**kwargs) + return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs) if "drop" in kwargs: drop = kwargs.pop("drop") - return Select(self, self._drop_to_columns(drop))._subset(**kwargs) + return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs) if "reorder" in kwargs: reorder = kwargs.pop("reorder") - return Select(self, self._reorder_to_columns(reorder))._subset(**kwargs) + return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs) if "rename" in kwargs: rename = kwargs.pop("rename") @@ -499,6 +526,9 @@ def mutate(self): return ZarrWithMissingDates(self.z if self.was_zarr else self.path) return self + def tree(self): + return Node(self, [], path=self.path) + class ZarrWithMissingDates(Zarr): def __init__(self, path): @@ -550,6 +580,9 @@ def __getitem__(self, n): def _report_missing(self, n): raise MissingDate(f"Date {self.missing_to_dates[n]} is missing (index={n})") + def tree(self): + return Node(self, [], path=self.path, missing=sorted(self.missing)) + class Forwards(Dataset): def __init__(self, forward): @@ -768,6 +801,9 @@ def dates(self): def shape(self): return (len(self),) + self.datasets[0].shape[1:] + def tree(self): + return Node(self, [d.tree() for d in self.datasets]) + class GivenAxis(Combined): """Given a given axis, combine the datasets along that axis.""" @@ -820,7 +856,9 @@ def __getitem__(self, n): class Ensemble(GivenAxis): - pass + + def tree(self): + return Node(self, [d.tree() for d in self.datasets]) class Grids(GivenAxis): @@ -851,6 +889,9 @@ def grids(self): result.extend(d.grids) return tuple(result) + def tree(self): + return Node(self, [d.tree() for d in self.datasets], mode="concat") + class CutoutGrids(Grids): def __init__(self, datasets, axis): @@ -926,6 +967,9 @@ def grids(self): shape = self.lam.shape return (shape[-1], self.shape[-1] - shape[-1]) + def tree(self): + return Node(self, [d.tree() for d in self.datasets], mode="cutout") + class Join(Combined): """Join the datasets along the variables axis.""" @@ -996,7 +1040,7 @@ def _overlay(self): if not ok: LOG.warning("Dataset %r completely overridden.", d) - return Select(self, indices) + return Select(self, indices, {"overlay": True}) @cached_property def variables(self): @@ -1036,6 +1080,9 @@ def missing(self): result = result | d.missing return result + def tree(self): + return Node(self, [d.tree() for d in self.datasets]) + class Subset(Forwards): """Select a subset of the dates.""" @@ -1108,17 +1155,20 @@ def source(self, index): return Source(self, index, self.forward.source(index)) def __repr__(self): - return f"Subset({self.dates[0]}, {self.dates[-1]}, {self.frequency})" + return f"Subset({self.dataset},{self.dates[0]}...{self.dates[-1]}/{self.frequency})" @cached_property def missing(self): return {self.indices[i] for i in self.dataset.missing if i in self.indices} + def tree(self): + return Node(self, [self.dataset.tree()]) + class Select(Forwards): """Select a subset of the variables.""" - def __init__(self, dataset, indices): + def __init__(self, dataset, indices, title): while isinstance(dataset, Select): indices = [dataset.indices[i] for i in indices] dataset = dataset.dataset @@ -1126,6 +1176,7 @@ def __init__(self, dataset, indices): self.dataset = dataset self.indices = list(indices) assert len(self.indices) > 0 + self.title = title or {"indices": self.indices} # Forward other properties to the main dataset super().__init__(dataset) @@ -1174,6 +1225,9 @@ def metadata_specific(self, **kwargs): def source(self, index): return Source(self, index, self.dataset.source(self.indices[index])) + def tree(self): + return Node(self, [self.dataset.tree()], **self.title) + class Rename(Forwards): def __init__(self, dataset, rename): @@ -1194,6 +1248,9 @@ def name_to_index(self): def metadata_specific(self, **kwargs): return super().metadata_specific(rename=self.rename, **kwargs) + def tree(self): + return Node(self, [self.forward.tree()], rename=self.rename) + class Statistics(Forwards): def __init__(self, dataset, statistic): @@ -1210,6 +1267,9 @@ def metadata_specific(self, **kwargs): **kwargs, ) + def tree(self): + return Node(self, [self.forward.tree()]) + def _name_to_path(name, zarr_root): _, ext = os.path.splitext(name) diff --git a/tests/test_data.py b/tests/test_data.py index a27012b..06ffeff 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -40,9 +40,7 @@ def _(date, var, k=0, e=0, values=VALUES): assert 0 <= k <= 9 assert 0 <= e <= 9 - return np.array( - [d * 100 + v + k / 10.0 + w / 100.0 + e / 1000.0 for w in range(values)] - ) + return np.array([d * 100 + v + k / 10.0 + w / 100.0 + e / 1000.0 for w in range(values)]) def create_zarr( @@ -249,9 +247,7 @@ def run( expected_variables = [v for v in expected_variables] if isinstance(expected_name_to_index, str): - expected_name_to_index = { - v: i for i, v in enumerate(expected_name_to_index) - } + expected_name_to_index = {v: i for i, v in enumerate(expected_name_to_index)} assert isinstance(self.ds, expected_class) assert len(self.ds) == expected_length @@ -283,6 +279,8 @@ def run( self.indexing(self.ds) self.metadata(self.ds) + self.ds.tree() + def metadata(self, ds): metadata = ds.metadata() assert isinstance(metadata, dict) @@ -297,15 +295,9 @@ def same_stats(self, ds1, ds2, vars1, vars2=None): idx1 = ds1.name_to_index[v1] idx2 = ds2.name_to_index[v2] assert (ds1.statistics["mean"][idx1] == ds2.statistics["mean"][idx2]).all() - assert ( - ds1.statistics["stdev"][idx1] == ds2.statistics["stdev"][idx2] - ).all() - assert ( - ds1.statistics["maximum"][idx1] == ds2.statistics["maximum"][idx2] - ).all() - assert ( - ds1.statistics["minimum"][idx1] == ds2.statistics["minimum"][idx2] - ).all() + assert (ds1.statistics["stdev"][idx1] == ds2.statistics["stdev"][idx2]).all() + assert (ds1.statistics["maximum"][idx1] == ds2.statistics["maximum"][idx2]).all() + assert (ds1.statistics["minimum"][idx1] == ds2.statistics["minimum"][idx2]).all() def indexing(self, ds): t = IndexTester(ds) @@ -481,9 +473,7 @@ def test_subset_2(): def test_subset_3(): - test = DatasetTester( - "test-2021-2023-1h-o96-abcd", start=2022, end=2022, frequency=12 - ) + test = DatasetTester("test-2021-2023-1h-o96-abcd", start=2022, end=2022, frequency=12) test.run( expected_class=Subset, expected_length=365 * 2, @@ -531,9 +521,7 @@ def test_subset_5(): def test_subset_6(): - test = DatasetTester( - "test-2021-2023-1h-o96-abcd", start="2022-06-01", end="2022-08-31" - ) + test = DatasetTester("test-2021-2023-1h-o96-abcd", start="2022-06-01", end="2022-08-31") test.run( expected_class=Subset, expected_length=(30 + 31 + 31) * 24, @@ -811,9 +799,7 @@ def test_constructor_5(): ) test.same_stats(test.ds, open_dataset("test-2021-2021-6h-o96-abcd-1"), "xyd", "acd") - test.same_stats( - test.ds, open_dataset("test-2021-2021-6h-o96-abcd-2"), "abzt", "abcd" - ) + test.same_stats(test.ds, open_dataset("test-2021-2021-6h-o96-abcd-2"), "abzt", "abcd") def test_dates(): @@ -855,9 +841,7 @@ def test_slice_1(): def test_slice_2(): - test = DatasetTester( - [f"test-{year}-{year}-12h-o96-abcd" for year in range(1940, 2023)] - ) + test = DatasetTester([f"test-{year}-{year}-12h-o96-abcd" for year in range(1940, 2023)]) test.run( expected_class=Concat, expected_length=60632, @@ -874,10 +858,7 @@ def test_slice_2(): def test_slice_3(): test = DatasetTester( - [ - f"test-2020-2020-6h-o96-{vars}" - for vars in ("abcd", "efgh", "ijkl", "mnop", "qrst", "uvwx", "yz") - ] + [f"test-2020-2020-6h-o96-{vars}" for vars in ("abcd", "efgh", "ijkl", "mnop", "qrst", "uvwx", "yz")] ) test.run( expected_class=Join, @@ -894,9 +875,7 @@ def test_slice_3(): def test_slice_4(): - test = DatasetTester( - [f"test-2020-2020-1h-o96-{vars}" for vars in ("abcd", "cd", "a", "c")] - ) + test = DatasetTester([f"test-2020-2020-1h-o96-{vars}" for vars in ("abcd", "cd", "a", "c")]) test.run( expected_class=Select, expected_length=8784, @@ -972,18 +951,10 @@ def test_ensemble_2(): expected_variables="abcd", expected_name_to_index="abcd", date_to_row=lambda date: make_row( - [_(date, "a", 1, i) for i in range(10)] - + [_(date, "a", 2, 0)] - + [_(date, "a", 3, i) for i in range(5)], - [_(date, "b", 1, i) for i in range(10)] - + [_(date, "b", 2, 0)] - + [_(date, "b", 3, i) for i in range(5)], - [_(date, "c", 1, i) for i in range(10)] - + [_(date, "c", 2, 0)] - + [_(date, "c", 3, i) for i in range(5)], - [_(date, "d", 1, i) for i in range(10)] - + [_(date, "d", 2, 0)] - + [_(date, "d", 3, i) for i in range(5)], + [_(date, "a", 1, i) for i in range(10)] + [_(date, "a", 2, 0)] + [_(date, "a", 3, i) for i in range(5)], + [_(date, "b", 1, i) for i in range(10)] + [_(date, "b", 2, 0)] + [_(date, "b", 3, i) for i in range(5)], + [_(date, "c", 1, i) for i in range(10)] + [_(date, "c", 2, 0)] + [_(date, "c", 3, i) for i in range(5)], + [_(date, "d", 1, i) for i in range(10)] + [_(date, "d", 2, 0)] + [_(date, "d", 3, i) for i in range(5)], ensemble=True, ), start_date=datetime.datetime(2021, 1, 1), @@ -1008,18 +979,10 @@ def test_ensemble_3(): expected_variables="abcd", expected_name_to_index="abcd", date_to_row=lambda date: make_row( - [_(date, "a", 1, i) for i in range(10)] - + [_(date, "a", 2, 0)] - + [_(date, "a", 3, i) for i in range(5)], - [_(date, "b", 1, i) for i in range(10)] - + [_(date, "b", 2, 0)] - + [_(date, "b", 3, i) for i in range(5)], - [_(date, "c", 1, i) for i in range(10)] - + [_(date, "c", 2, 0)] - + [_(date, "c", 3, i) for i in range(5)], - [_(date, "d", 1, i) for i in range(10)] - + [_(date, "d", 2, 0)] - + [_(date, "d", 3, i) for i in range(5)], + [_(date, "a", 1, i) for i in range(10)] + [_(date, "a", 2, 0)] + [_(date, "a", 3, i) for i in range(5)], + [_(date, "b", 1, i) for i in range(10)] + [_(date, "b", 2, 0)] + [_(date, "b", 3, i) for i in range(5)], + [_(date, "c", 1, i) for i in range(10)] + [_(date, "c", 2, 0)] + [_(date, "c", 3, i) for i in range(5)], + [_(date, "d", 1, i) for i in range(10)] + [_(date, "d", 2, 0)] + [_(date, "d", 3, i) for i in range(5)], ensemble=True, ), start_date=datetime.datetime(2021, 1, 1), @@ -1070,9 +1033,7 @@ def test_grids(): ds1 = open_dataset("test-2021-2021-6h-o96-abcd-1-1") ds2 = open_dataset("test-2021-2021-6h-o96-abcd-2-1-25") - assert ( - test.ds.longitudes == np.concatenate([ds1.longitudes, ds2.longitudes]) - ).all() + assert (test.ds.longitudes == np.concatenate([ds1.longitudes, ds2.longitudes])).all() assert (test.ds.latitudes == np.concatenate([ds1.latitudes, ds2.latitudes])).all()