From 76e73b942eba22caa570d6a4121c5ae840df47a8 Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Thu, 11 Jan 2024 14:25:22 +0100 Subject: [PATCH 01/11] replace |= operation with update --- polytope/datacube/backends/fdb.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/polytope/datacube/backends/fdb.py b/polytope/datacube/backends/fdb.py index cbc76893f..6cba64713 100644 --- a/polytope/datacube/backends/fdb.py +++ b/polytope/datacube/backends/fdb.py @@ -48,7 +48,7 @@ def get(self, requests: IndexTree, leaf_path={}): (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) - leaf_path |= key_value_path + leaf_path.update(key_value_path) if len(requests.children[0].children[0].children) == 0: # remap this last key self.get_2nd_last_values(requests, leaf_path) @@ -79,7 +79,7 @@ def get_2nd_last_values(self, requests, leaf_path={}): (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) - leaf_path |= key_value_path + leaf_path.update(key_value_path) (range_lengths[i], current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf( lat_child, leaf_path, range_length, current_start_idx, fdb_range_nodes ) @@ -94,7 +94,7 @@ def get_last_layer_before_leaf(self, requests, leaf_path, range_l, current_idx, (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) - leaf_path |= key_value_path + leaf_path.update(key_value_path) last_idx = key_value_path["values"] if current_idx[i] is None: current_idx[i] = last_idx @@ -109,7 +109,7 @@ def get_last_layer_before_leaf(self, requests, leaf_path, range_l, current_idx, (key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( key_value_path, leaf_path, self.unwanted_path ) - leaf_path |= key_value_path + leaf_path.update(key_value_path) i += 1 current_start_idx = key_value_path["values"] current_idx[i] = current_start_idx From 1d98c8882eefd4ad088e512d5c42e9ef3a92f75c Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Thu, 11 Jan 2024 16:30:59 +0100 Subject: [PATCH 02/11] test for polytope higher level timeseries shape --- tests/test_ecmwf_oper_data_fdb.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/test_ecmwf_oper_data_fdb.py b/tests/test_ecmwf_oper_data_fdb.py index 7ec4a4659..e0d179dd5 100644 --- a/tests/test_ecmwf_oper_data_fdb.py +++ b/tests/test_ecmwf_oper_data_fdb.py @@ -3,7 +3,7 @@ from polytope.engine.hullslicer import HullSlicer from polytope.polytope import Polytope, Request -from polytope.shapes import Box, Select +from polytope.shapes import Box, Select, Union class TestSlicingFDBDatacube: @@ -16,7 +16,7 @@ def setup_method(self, method): "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, "step": {"type_change": "int"}, } - self.config = {"class": "od", "expver": "0001", "levtype": "sfc", "step": 0, "type": "fc"} + self.config = {"class": "od", "expver": "0001", "levtype": "sfc", "type": "fc", "stream": "oper"} self.fdbdatacube = FDBDatacube(self.config, axis_options=self.options) self.slicer = HullSlicer() self.API = Polytope(datacube=self.fdbdatacube, engine=self.slicer, axis_options=self.options) @@ -39,3 +39,28 @@ def test_fdb_datacube(self): result = self.API.retrieve(request) result.pprint() assert len(result.leaves) == 9 + + @pytest.mark.fdb + def test_fdb_datacube_point(self): + request = Request( + # Select("step", [0, 1]), + Union( + ["latitude", "longitude", "step"], + *[ + Box(["latitude", "longitude", "step"], lower_corner=[p[0], p[1], 0], upper_corner=[p[0], p[1], 2]) + for p in [[0.035149384216, 0.0]] + ] + ), + Select("levtype", ["sfc"]), + Select("date", [pd.Timestamp("20240103T0000")]), + Select("domain", ["g"]), + Select("expver", ["0001"]), + Select("param", ["167"]), + Select("class", ["od"]), + Select("stream", ["oper"]), + Select("type", ["fc"]), + # Point(["latitude", "longitude"], [[0.035149384216, 0.0]]), + ) + result = self.API.retrieve(request) + result.pprint() + assert len(result.leaves) == 3 From ea909c6e92e72356423ff909b7132a5d7e31cbe6 Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 09:18:41 +0100 Subject: [PATCH 03/11] add __init__ to transformations folder --- polytope/datacube/transformations/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 polytope/datacube/transformations/__init__.py diff --git a/polytope/datacube/transformations/__init__.py b/polytope/datacube/transformations/__init__.py new file mode 100644 index 000000000..cf6989be4 --- /dev/null +++ b/polytope/datacube/transformations/__init__.py @@ -0,0 +1 @@ +from ..transformations.datacube_transformations import * From 20f432890f2a13fa462ada0e8863782ea80208bf Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 09:27:31 +0100 Subject: [PATCH 04/11] fix small test issues --- tests/test_fdb_datacube.py | 3 +-- tests/test_slice_date_range_fdb.py | 1 - tests/test_slice_date_range_fdb_v2.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_fdb_datacube.py b/tests/test_fdb_datacube.py index a7c54816a..bf4452a35 100644 --- a/tests/test_fdb_datacube.py +++ b/tests/test_fdb_datacube.py @@ -20,7 +20,7 @@ def setup_method(self, method): "step": {"type_change": "int"}, "number": {"type_change": "int"}, } - self.config = {"class": "od", "expver": "0001", "levtype": "sfc"} + self.config = {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper"} self.fdbdatacube = FDBDatacube(self.config, axis_options=self.options) self.slicer = HullSlicer() self.API = Polytope(datacube=self.fdbdatacube, engine=self.slicer, axis_options=self.options) @@ -38,7 +38,6 @@ def test_fdb_datacube(self): Select("class", ["od"]), Select("stream", ["oper"]), Select("type", ["an"]), - Select("number", [1]), Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), ) result = self.API.retrieve(request) diff --git a/tests/test_slice_date_range_fdb.py b/tests/test_slice_date_range_fdb.py index 624a77fe2..fbbdff7f2 100644 --- a/tests/test_slice_date_range_fdb.py +++ b/tests/test_slice_date_range_fdb.py @@ -27,7 +27,6 @@ def setup_method(self, method): def test_fdb_datacube(self): request = Request( Select("step", [0]), - Select("number", [1]), Select("levtype", ["sfc"]), Span("date", pd.Timestamp("20230625T120000"), pd.Timestamp("20230626T120000")), Select("domain", ["g"]), diff --git a/tests/test_slice_date_range_fdb_v2.py b/tests/test_slice_date_range_fdb_v2.py index 24ae1a9a5..ee50e75b6 100644 --- a/tests/test_slice_date_range_fdb_v2.py +++ b/tests/test_slice_date_range_fdb_v2.py @@ -16,7 +16,7 @@ def setup_method(self, method): "date": {"merge": {"with": "time", "linkers": ["T", "00"]}}, "step": {"type_change": "int"}, } - self.config = {"class": "ea", "expver": "0001", "levtype": "pl", "step": 0} + self.config = {"class": "ea", "expver": "0001", "levtype": "pl"} self.fdbdatacube = FDBDatacube(self.config, axis_options=self.options) self.slicer = HullSlicer() self.API = Polytope(datacube=self.fdbdatacube, engine=self.slicer, axis_options=self.options) From c3c6221c49a9c31f882703c70fa87e499dc21398 Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 09:29:06 +0100 Subject: [PATCH 05/11] remove {} as default argument in functions --- polytope/datacube/backends/fdb.py | 14 +++++++++++--- polytope/datacube/backends/xarray.py | 4 +++- polytope/polytope.py | 5 ++++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/polytope/datacube/backends/fdb.py b/polytope/datacube/backends/fdb.py index 6cba64713..69eb1ed00 100644 --- a/polytope/datacube/backends/fdb.py +++ b/polytope/datacube/backends/fdb.py @@ -6,7 +6,11 @@ class FDBDatacube(Datacube): - def __init__(self, config={}, axis_options={}): + def __init__(self, config=None, axis_options=None): + if config is None: + config = {} + if axis_options is None: + axis_options = {} self.axis_options = axis_options self.axis_counter = 0 self._axes = None @@ -36,7 +40,9 @@ def __init__(self, config={}, axis_options={}): val = self._axes[name].type self._check_and_add_axes(options, name, val) - def get(self, requests: IndexTree, leaf_path={}): + def get(self, requests: IndexTree, leaf_path=None): + if leaf_path is None: + leaf_path = {} # First when request node is root, go to its children if requests.axis.name == "root": for c in requests.children: @@ -58,7 +64,9 @@ def get(self, requests: IndexTree, leaf_path={}): for c in requests.children: self.get(c, leaf_path) - def get_2nd_last_values(self, requests, leaf_path={}): + def get_2nd_last_values(self, requests, leaf_path=None): + if leaf_path is None: + leaf_path = {} # In this function, we recursively loop over the last two layers of the tree and store the indices of the # request ranges in those layers lat_length = len(requests.children) diff --git a/polytope/datacube/backends/xarray.py b/polytope/datacube/backends/xarray.py index f8ca1c2e2..ff0c8ef5c 100644 --- a/polytope/datacube/backends/xarray.py +++ b/polytope/datacube/backends/xarray.py @@ -8,7 +8,9 @@ class XArrayDatacube(Datacube): """Xarray arrays are labelled, axes can be defined as strings or integers (e.g. "time" or 0).""" - def __init__(self, dataarray: xr.DataArray, axis_options={}): + def __init__(self, dataarray: xr.DataArray, axis_options=None): + if axis_options is None: + axis_options = {} self.axis_options = axis_options self.axis_counter = 0 self._axes = None diff --git a/polytope/polytope.py b/polytope/polytope.py index f6d4a723e..361bc6f40 100644 --- a/polytope/polytope.py +++ b/polytope/polytope.py @@ -37,10 +37,13 @@ def __repr__(self): class Polytope: - def __init__(self, datacube, engine=None, axis_options={}): + def __init__(self, datacube, engine=None, axis_options=None): from .datacube import Datacube from .engine import Engine + if axis_options is None: + axis_options = {} + self.datacube = Datacube.create(datacube, axis_options) self.engine = engine if engine is not None else Engine.default() From 3678ffe1c060e6f56700833c7e75048538c39011 Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 10:02:59 +0100 Subject: [PATCH 06/11] add point with method=surrounding test --- tests/test_point_shape.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_point_shape.py b/tests/test_point_shape.py index 0bc203d61..56659227e 100644 --- a/tests/test_point_shape.py +++ b/tests/test_point_shape.py @@ -34,3 +34,8 @@ def test_point_surrounding_step(self): request = Request(Point(["step", "level"], [[2, 10]], method="surrounding"), Select("date", ["2000-01-01"])) result = self.API.retrieve(request) assert len(result.leaves) == 6 + + def test_point_surrounding_exact_step(self): + request = Request(Point(["step", "level"], [[3, 10]], method="surrounding"), Select("date", ["2000-01-01"])) + result = self.API.retrieve(request) + assert len(result.leaves) == 9 From 904828e8a8d8955b93627b31091b9575c0cbb720 Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 10:14:39 +0100 Subject: [PATCH 07/11] fix point test with method=surrounding on real data --- tests/test_ecmwf_oper_data_fdb.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/tests/test_ecmwf_oper_data_fdb.py b/tests/test_ecmwf_oper_data_fdb.py index e0d179dd5..692924f15 100644 --- a/tests/test_ecmwf_oper_data_fdb.py +++ b/tests/test_ecmwf_oper_data_fdb.py @@ -3,7 +3,7 @@ from polytope.engine.hullslicer import HullSlicer from polytope.polytope import Polytope, Request -from polytope.shapes import Box, Select, Union +from polytope.shapes import Box, Point, Select class TestSlicingFDBDatacube: @@ -43,14 +43,7 @@ def test_fdb_datacube(self): @pytest.mark.fdb def test_fdb_datacube_point(self): request = Request( - # Select("step", [0, 1]), - Union( - ["latitude", "longitude", "step"], - *[ - Box(["latitude", "longitude", "step"], lower_corner=[p[0], p[1], 0], upper_corner=[p[0], p[1], 2]) - for p in [[0.035149384216, 0.0]] - ] - ), + Select("step", [0, 1]), Select("levtype", ["sfc"]), Select("date", [pd.Timestamp("20240103T0000")]), Select("domain", ["g"]), @@ -59,8 +52,8 @@ def test_fdb_datacube_point(self): Select("class", ["od"]), Select("stream", ["oper"]), Select("type", ["fc"]), - # Point(["latitude", "longitude"], [[0.035149384216, 0.0]]), + Point(["latitude", "longitude"], [[0.035149384216, 0.0]], method="surrounding"), ) result = self.API.retrieve(request) result.pprint() - assert len(result.leaves) == 3 + assert len(result.leaves) == 12 From 643c91ac083e1e50ec9cdef5fa709e106b9b413c Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 11:25:18 +0100 Subject: [PATCH 08/11] add logging to datacube/fdb backend --- polytope/datacube/backends/datacube.py | 4 ++++ polytope/datacube/backends/fdb.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/polytope/datacube/backends/datacube.py b/polytope/datacube/backends/datacube.py index efa20e93d..d3f3300aa 100644 --- a/polytope/datacube/backends/datacube.py +++ b/polytope/datacube/backends/datacube.py @@ -1,4 +1,5 @@ import importlib +import logging import math from abc import ABC, abstractmethod from typing import Any @@ -105,6 +106,9 @@ def get_indices(self, path: DatacubePath, axis, lower, upper, method=None): if offset is not None: # Note that we can only do unique if not dealing with time values idx_between = unique(idx_between) + + logging.info(f"For axis {axis.name} between {lower} and {upper}, found indices {idx_between}") + return idx_between def _look_up_datacube(self, search_ranges, search_ranges_offset, indexes, axis, method): diff --git a/polytope/datacube/backends/fdb.py b/polytope/datacube/backends/fdb.py index 69eb1ed00..0981de031 100644 --- a/polytope/datacube/backends/fdb.py +++ b/polytope/datacube/backends/fdb.py @@ -1,3 +1,4 @@ +import logging from copy import deepcopy import pygribjump as pygj @@ -11,6 +12,9 @@ def __init__(self, config=None, axis_options=None): config = {} if axis_options is None: axis_options = {} + + logging.info("Created an FDB datacube with options: " + str(axis_options)) + self.axis_options = axis_options self.axis_counter = 0 self._axes = None @@ -25,6 +29,9 @@ def __init__(self, config=None, axis_options=None): self.fdb = pygj.GribJump() self.fdb_coordinates = self.fdb.axes(partial_request) + + logging.info("Axes returned from GribJump are: " + str(self.fdb_coordinates)) + self.fdb_coordinates["values"] = [] for name, values in self.fdb_coordinates.items(): values.sort() @@ -40,11 +47,18 @@ def __init__(self, config=None, axis_options=None): val = self._axes[name].type self._check_and_add_axes(options, name, val) + logging.info("Polytope created axes for: " + str(self._axes.keys())) + def get(self, requests: IndexTree, leaf_path=None): + if leaf_path is None: leaf_path = {} + # First when request node is root, go to its children if requests.axis.name == "root": + + logging.info("Looking for data for the tree: " + str([leaf.flatten() for leaf in requests.leaves])) + for c in requests.children: self.get(c) # If request node has no children, we have a leaf so need to assign fdb values to it From 3fde0300110b25f0f85d3c69c4f32f5ff80489db Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 13:53:55 +0100 Subject: [PATCH 09/11] add logging for hullslicer engine --- polytope/engine/hullslicer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/polytope/engine/hullslicer.py b/polytope/engine/hullslicer.py index e6c8e3eb0..29b166229 100644 --- a/polytope/engine/hullslicer.py +++ b/polytope/engine/hullslicer.py @@ -1,3 +1,4 @@ +import logging import math from copy import copy from itertools import chain @@ -72,6 +73,9 @@ def _build_sliceable_child(self, polytope, ax, node, datacube, lower, upper, nex remapped_val_interm = ax.remap([value, value])[0] remapped_val = (remapped_val_interm[0] + remapped_val_interm[1]) / 2 remapped_val = round(remapped_val, int(-math.log10(ax.tol))) + + logging.info(f"Added index {remapped_val} on axis {ax.name} to the tree") + child = node.create_child(ax, remapped_val) child["unsliced_polytopes"] = copy(node["unsliced_polytopes"]) child["unsliced_polytopes"].remove(polytope) From 12836ca0593d2b749c7c09b8099eb70493a98d91 Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Fri, 12 Jan 2024 14:55:42 +0100 Subject: [PATCH 10/11] logging on mapping and merger transformations --- polytope/datacube/backends/fdb.py | 2 -- .../transformations/datacube_mappers.py | 19 +++++++++++++++++++ .../transformations/datacube_merger.py | 10 ++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/polytope/datacube/backends/fdb.py b/polytope/datacube/backends/fdb.py index 0981de031..23bdc1a3a 100644 --- a/polytope/datacube/backends/fdb.py +++ b/polytope/datacube/backends/fdb.py @@ -50,13 +50,11 @@ def __init__(self, config=None, axis_options=None): logging.info("Polytope created axes for: " + str(self._axes.keys())) def get(self, requests: IndexTree, leaf_path=None): - if leaf_path is None: leaf_path = {} # First when request node is root, go to its children if requests.axis.name == "root": - logging.info("Looking for data for the tree: " + str([leaf.flatten() for leaf in requests.leaves])) for c in requests.children: diff --git a/polytope/datacube/transformations/datacube_mappers.py b/polytope/datacube/transformations/datacube_mappers.py index 0a7a3f5e9..750b8c6fc 100644 --- a/polytope/datacube/transformations/datacube_mappers.py +++ b/polytope/datacube/transformations/datacube_mappers.py @@ -1,4 +1,5 @@ import bisect +import logging import math from copy import deepcopy from importlib import import_module @@ -126,6 +127,12 @@ def unmap(self, first_val, second_val): second_val = [i for i in self.second_axis_vals(first_val) if second_val - tol <= i <= second_val + tol][0] second_idx = self.second_axis_vals(first_val).index(second_val) final_index = self.axes_idx_to_regular_idx(first_idx, second_idx) + + logging.info( + f"Mapped the values {first_val} on axis {self._mapped_axes[0]} \ + and {second_val} on axis {self._mapped_axes[1]} to value {final_index} on axis {self._base_axis}" + ) + return final_index @@ -1627,6 +1634,12 @@ def unmap(self, first_val, second_val): second_val = [i for i in self.second_axis_vals(first_val) if second_val - tol <= i <= second_val + tol][0] second_idx = self.second_axis_vals(first_val).index(second_val) reduced_ll_index = self.axes_idx_to_reduced_ll_idx(first_idx, second_idx) + + logging.info( + f"Mapped the values {first_val} on axis {self._mapped_axes[0]} \ + and {second_val} on axis {self._mapped_axes[1]} to value {reduced_ll_index} on axis {self._base_axis}" + ) + return reduced_ll_index @@ -4494,6 +4507,12 @@ def find_second_axis_idx(self, first_val, second_val): def unmap(self, first_val, second_val): (first_idx, second_idx) = self.find_second_axis_idx(first_val, second_val) octahedral_index = self.axes_idx_to_octahedral_idx(first_idx, second_idx) + + logging.info( + f"Mapped the values {first_val} on axis {self._mapped_axes[0]} \ + and {second_val} on axis {self._mapped_axes[1]} to value {octahedral_index} on axis {self._base_axis}" + ) + return octahedral_index diff --git a/polytope/datacube/transformations/datacube_merger.py b/polytope/datacube/transformations/datacube_merger.py index d60278671..8864cabf5 100644 --- a/polytope/datacube/transformations/datacube_merger.py +++ b/polytope/datacube/transformations/datacube_merger.py @@ -1,3 +1,5 @@ +import logging + import numpy as np import pandas as pd @@ -37,6 +39,10 @@ def merged_values(self, datacube): val_to_add = val_to_add.astype("datetime64[s]") merged_values.append(val_to_add) merged_values = np.array(merged_values) + logging.info( + f"Merged values {first_ax_vals} on axis {self.name} and \ + values {second_ax_vals} on axis {second_ax_name} to values {merged_values}" + ) return merged_values def transformation_axes_final(self): @@ -56,6 +62,10 @@ def unmerge(self, merged_val): # TODO: maybe replacing like this is too specific to time/dates? first_val = str(first_val).replace("-", "") second_val = second_val.replace(":", "") + logging.info( + f"Unmerged value {merged_val} to values {first_val} on axis {self.name} \ + and {second_val} on axis {self._second_axis}" + ) return (first_val, second_val) def change_val_type(self, axis_name, values): From 3c1d417ee469185ef8de4059a6e65bc3627857a3 Mon Sep 17 00:00:00 2001 From: Mathilde Leuridan Date: Tue, 16 Jan 2024 14:57:18 +0100 Subject: [PATCH 11/11] update requirements --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 50594c4fc..66101f546 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,8 @@ decorator==5.1.1 numpy==1.23.5 pandas==1.5.2 pypi==2.1 -requests==2.28.1 -scipy==1.9.3 +requests==2.31.0 +scipy==1.11.4 sortedcontainers==2.4.0 tripy==1.0.0 typing==3.7.4.3