-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #76 from ecmwf/feature/test_date_ranges
Feature/test date ranges
- Loading branch information
Showing
35 changed files
with
1,001 additions
and
308 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,4 +6,8 @@ polytope.egg-info | |
.pytest_cache | ||
*.prof | ||
*.idx | ||
*.grib | ||
*.xml | ||
site | ||
.coverage | ||
*.grib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import time | ||
|
||
import pandas as pd | ||
|
||
from polytope.datacube.backends.fdb import FDBDatacube | ||
from polytope.engine.hullslicer import HullSlicer | ||
from polytope.polytope import Polytope, Request | ||
from polytope.shapes import Box, Select | ||
|
||
|
||
class TestSlicingFDBDatacube: | ||
def setup_method(self, method): | ||
# Create a dataarray with 3 labelled axes using different index types | ||
self.options = { | ||
"values": { | ||
"transformation": { | ||
"mapper": {"type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]} | ||
} | ||
}, | ||
"date": {"transformation": {"merge": {"with": "time", "linkers": [" ", "00"]}}}, | ||
"step": {"transformation": {"type_change": "int"}}, | ||
} | ||
self.config = {"class": "od", "expver": "0001", "levtype": "sfc", "step": 0} | ||
self.fdbdatacube = FDBDatacube(self.config, axis_options=self.options) | ||
self.slicer = HullSlicer() | ||
self.API = Polytope(datacube=self.fdbdatacube, engine=self.slicer, axis_options=self.options) | ||
|
||
# Testing different shapes | ||
# @pytest.mark.skip(reason="can't install fdb branch on CI") | ||
def test_fdb_datacube(self): | ||
request = Request( | ||
Select("step", [0]), | ||
Select("levtype", ["sfc"]), | ||
Select("date", [pd.Timestamp("20230625T120000")]), | ||
Select("domain", ["g"]), | ||
Select("expver", ["0001"]), | ||
Select("param", ["167"]), | ||
Select("class", ["od"]), | ||
Select("stream", ["oper"]), | ||
Select("type", ["an"]), | ||
Box(["latitude", "longitude"], [0, 0], [10, 10]), | ||
) | ||
time1 = time.time() | ||
result = self.API.retrieve(request) | ||
print("ENTIRE TIME") | ||
print(time.time() - time1) | ||
print(len(result.leaves)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import time | ||
|
||
import pandas as pd | ||
|
||
from polytope.datacube.backends.fdb import FDBDatacube | ||
from polytope.engine.hullslicer import HullSlicer | ||
from polytope.polytope import Polytope, Request | ||
from polytope.shapes import Box, Select, Span | ||
|
||
|
||
class TestSlicingFDBDatacube: | ||
def setup_method(self, method): | ||
# Create a dataarray with 3 labelled axes using different index types | ||
self.options = { | ||
"values": { | ||
"transformation": { | ||
"mapper": {"type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]} | ||
} | ||
}, | ||
"date": {"transformation": {"merge": {"with": "time", "linkers": [" ", "00"]}}}, | ||
"step": {"transformation": {"type_change": "int"}}, | ||
"levelist": {"transformation": {"type_change": "int"}}, | ||
} | ||
self.config = {"class": "od", "expver": "0001", "levtype": "sfc"} | ||
self.fdbdatacube = FDBDatacube(self.config, axis_options=self.options) | ||
self.slicer = HullSlicer() | ||
self.API = Polytope(datacube=self.fdbdatacube, engine=self.slicer, axis_options=self.options) | ||
|
||
# Testing different shapes | ||
# @pytest.mark.skip(reason="can't install fdb branch on CI") | ||
def test_fdb_datacube(self): | ||
request = Request( | ||
Span("step", 1, 15), | ||
Select("levtype", ["sfc"]), | ||
Select("date", [pd.Timestamp("20231102T000000")]), | ||
Select("domain", ["g"]), | ||
Select("expver", ["0001"]), | ||
Select("param", ["167"]), | ||
Select("class", ["od"]), | ||
Select("stream", ["oper"]), | ||
Select("type", ["fc"]), | ||
Box(["latitude", "longitude"], [0, 0], [3, 5]), | ||
) | ||
time1 = time.time() | ||
result = self.API.retrieve(request) | ||
print("ENTIRE TIME") | ||
print(time.time() - time1) | ||
print(len(result.leaves)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import matplotlib.pyplot as plt | ||
|
||
fdb_time = [ | ||
7.6377081871032715 - 7.558288812637329, | ||
73.57192325592041 - 72.99611115455627, | ||
733.2706120014191 - 727.7059993743896, | ||
4808.3157522678375 - 4770.814565420151, | ||
] | ||
num_extracted_points = [1986, 19226, 191543, 1267134] | ||
|
||
# for the 1.3M points, we used 100 latitudes too...., maybe that's why it's not as linear... | ||
|
||
plt.plot(num_extracted_points, fdb_time, marker="o") | ||
plt.xlabel("Number of extracted points") | ||
plt.ylabel("Polytope extraction time (in s)") | ||
plt.show() |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from ..backends.datacube import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
from copy import deepcopy | ||
|
||
import pyfdb | ||
|
||
from .datacube import Datacube, IndexTree | ||
|
||
|
||
class FDBDatacube(Datacube): | ||
def __init__(self, config={}, axis_options={}): | ||
self.axis_options = axis_options | ||
self.axis_counter = 0 | ||
self._axes = None | ||
treated_axes = [] | ||
self.non_complete_axes = [] | ||
self.complete_axes = [] | ||
self.blocked_axes = [] | ||
self.unwanted_axes = [] | ||
self.fake_axes = [] | ||
self.unwanted_path = {} | ||
|
||
partial_request = config | ||
# Find values in the level 3 FDB datacube | ||
# Will be in the form of a dictionary? {axis_name:values_available, ...} | ||
self.fdb = pyfdb.FDB() | ||
self.fdb_coordinates = self.fdb.axes(partial_request).as_dict() | ||
self.fdb_coordinates["values"] = [] | ||
for name, values in self.fdb_coordinates.items(): | ||
values.sort() | ||
options = axis_options.get(name, {}) | ||
self._check_and_add_axes(options, name, values) | ||
treated_axes.append(name) | ||
self.complete_axes.append(name) | ||
|
||
# add other options to axis which were just created above like "lat" for the mapper transformations for eg | ||
for name in self._axes: | ||
if name not in treated_axes: | ||
options = axis_options.get(name, {}) | ||
val = self._axes[name].type | ||
self._check_and_add_axes(options, name, val) | ||
|
||
def remove_unwanted_axes(self, leaf_path): | ||
for axis in self.unwanted_axes: | ||
leaf_path.pop(axis) | ||
return leaf_path | ||
|
||
def get(self, requests: IndexTree, leaf_path={}): | ||
# First when request node is root, go to its children | ||
if requests.axis.name == "root": | ||
for c in requests.children: | ||
self.get(c) | ||
# If request node has no children, we have a leaf so need to assign fdb values to it | ||
else: | ||
key_value_path = {requests.axis.name: requests.value} | ||
ax = requests.axis | ||
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( | ||
key_value_path, leaf_path, self.unwanted_path | ||
) | ||
leaf_path |= key_value_path | ||
if len(requests.children[0].children[0].children) == 0: | ||
# remap this last key | ||
self.get_2nd_last_values(requests, leaf_path) | ||
|
||
# Otherwise remap the path for this key and iterate again over children | ||
else: | ||
for c in requests.children: | ||
self.get(c, leaf_path) | ||
|
||
def get_2nd_last_values(self, requests, leaf_path={}): | ||
# In this function, we recursively loop over the last two layers of the tree and store the indices of the | ||
# request ranges in those layers | ||
lat_length = len(requests.children) | ||
range_lengths = [False] * lat_length | ||
current_start_idxs = [False] * lat_length | ||
fdb_node_ranges = [False] * lat_length | ||
for i in range(len(requests.children)): | ||
lat_child = requests.children[i] | ||
lon_length = len(lat_child.children) | ||
range_lengths[i] = [1] * lon_length | ||
current_start_idxs[i] = [None] * lon_length | ||
fdb_node_ranges[i] = [[IndexTree.root] * lon_length] * lon_length | ||
range_length = deepcopy(range_lengths[i]) | ||
current_start_idx = deepcopy(current_start_idxs[i]) | ||
fdb_range_nodes = deepcopy(fdb_node_ranges[i]) | ||
key_value_path = {lat_child.axis.name: lat_child.value} | ||
ax = lat_child.axis | ||
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( | ||
key_value_path, leaf_path, self.unwanted_path | ||
) | ||
leaf_path |= key_value_path | ||
(range_lengths[i], current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf( | ||
lat_child, leaf_path, range_length, current_start_idx, fdb_range_nodes | ||
) | ||
self.give_fdb_val_to_node(leaf_path, range_lengths, current_start_idxs, fdb_node_ranges, lat_length) | ||
|
||
def get_last_layer_before_leaf(self, requests, leaf_path, range_l, current_idx, fdb_range_n): | ||
i = 0 | ||
for c in requests.children: | ||
# now c are the leaves of the initial tree | ||
key_value_path = {c.axis.name: c.value} | ||
ax = c.axis | ||
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( | ||
key_value_path, leaf_path, self.unwanted_path | ||
) | ||
leaf_path |= key_value_path | ||
last_idx = key_value_path["values"] | ||
if current_idx[i] is None: | ||
current_idx[i] = last_idx | ||
fdb_range_n[i][range_l[i] - 1] = c | ||
else: | ||
if last_idx == current_idx[i] + range_l[i]: | ||
range_l[i] += 1 | ||
fdb_range_n[i][range_l[i] - 1] = c | ||
else: | ||
key_value_path = {c.axis.name: c.value} | ||
ax = c.axis | ||
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key( | ||
key_value_path, leaf_path, self.unwanted_path | ||
) | ||
leaf_path |= key_value_path | ||
i += 1 | ||
current_start_idx = key_value_path["values"] | ||
current_idx[i] = current_start_idx | ||
return (range_l, current_idx, fdb_range_n) | ||
|
||
def give_fdb_val_to_node(self, leaf_path, range_lengths, current_start_idx, fdb_range_nodes, lat_length): | ||
(output_values, original_indices) = self.find_fdb_values( | ||
leaf_path, range_lengths, current_start_idx, lat_length | ||
) | ||
new_fdb_range_nodes = [] | ||
new_range_lengths = [] | ||
for j in range(lat_length): | ||
for i in range(len(range_lengths[j])): | ||
if current_start_idx[j][i] is not None: | ||
new_fdb_range_nodes.append(fdb_range_nodes[j][i]) | ||
new_range_lengths.append(range_lengths[j][i]) | ||
sorted_fdb_range_nodes = [new_fdb_range_nodes[i] for i in original_indices] | ||
sorted_range_lengths = [new_range_lengths[i] for i in original_indices] | ||
for i in range(len(sorted_fdb_range_nodes)): | ||
for k in range(sorted_range_lengths[i]): | ||
n = sorted_fdb_range_nodes[i][k] | ||
n.result = output_values[0][0][0][i][k] | ||
|
||
def find_fdb_values(self, path, range_lengths, current_start_idx, lat_length): | ||
path.pop("values") | ||
fdb_requests = [] | ||
interm_request_ranges = [] | ||
for i in range(lat_length): | ||
for j in range(len(range_lengths[i])): | ||
if current_start_idx[i][j] is not None: | ||
current_request_ranges = (current_start_idx[i][j], current_start_idx[i][j] + range_lengths[i][j]) | ||
interm_request_ranges.append(current_request_ranges) | ||
request_ranges_with_idx = list(enumerate(interm_request_ranges)) | ||
sorted_list = sorted(request_ranges_with_idx, key=lambda x: x[1][0]) | ||
original_indices, sorted_request_ranges = zip(*sorted_list) | ||
fdb_requests.append(tuple((path, sorted_request_ranges))) | ||
output_values = self.fdb.extract(fdb_requests) | ||
return (output_values, original_indices) | ||
|
||
def datacube_natural_indexes(self, axis, subarray): | ||
indexes = subarray[axis.name] | ||
return indexes | ||
|
||
def select(self, path, unmapped_path): | ||
return self.fdb_coordinates | ||
|
||
def ax_vals(self, name): | ||
return self.fdb_coordinates.get(name, None) |
Oops, something went wrong.