Skip to content

Commit

Permalink
Merge pull request #76 from ecmwf/feature/test_date_ranges
Browse files Browse the repository at this point in the history
Feature/test date ranges
  • Loading branch information
mathleur authored Nov 23, 2023
2 parents 1b8facb + 0086059 commit f1dc3e9
Show file tree
Hide file tree
Showing 35 changed files with 1,001 additions and 308 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@ polytope.egg-info
.pytest_cache
*.prof
*.idx
*.grib
*.xml
site
.coverage
*.grib
47 changes: 47 additions & 0 deletions performance/fdb_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import time

import pandas as pd

from polytope.datacube.backends.fdb import FDBDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Box, Select


class TestSlicingFDBDatacube:
def setup_method(self, method):
# Create a dataarray with 3 labelled axes using different index types
self.options = {
"values": {
"transformation": {
"mapper": {"type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]}
}
},
"date": {"transformation": {"merge": {"with": "time", "linkers": [" ", "00"]}}},
"step": {"transformation": {"type_change": "int"}},
}
self.config = {"class": "od", "expver": "0001", "levtype": "sfc", "step": 0}
self.fdbdatacube = FDBDatacube(self.config, axis_options=self.options)
self.slicer = HullSlicer()
self.API = Polytope(datacube=self.fdbdatacube, engine=self.slicer, axis_options=self.options)

# Testing different shapes
# @pytest.mark.skip(reason="can't install fdb branch on CI")
def test_fdb_datacube(self):
request = Request(
Select("step", [0]),
Select("levtype", ["sfc"]),
Select("date", [pd.Timestamp("20230625T120000")]),
Select("domain", ["g"]),
Select("expver", ["0001"]),
Select("param", ["167"]),
Select("class", ["od"]),
Select("stream", ["oper"]),
Select("type", ["an"]),
Box(["latitude", "longitude"], [0, 0], [10, 10]),
)
time1 = time.time()
result = self.API.retrieve(request)
print("ENTIRE TIME")
print(time.time() - time1)
print(len(result.leaves))
48 changes: 48 additions & 0 deletions performance/fdb_performance_3D.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import time

import pandas as pd

from polytope.datacube.backends.fdb import FDBDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Box, Select, Span


class TestSlicingFDBDatacube:
def setup_method(self, method):
# Create a dataarray with 3 labelled axes using different index types
self.options = {
"values": {
"transformation": {
"mapper": {"type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]}
}
},
"date": {"transformation": {"merge": {"with": "time", "linkers": [" ", "00"]}}},
"step": {"transformation": {"type_change": "int"}},
"levelist": {"transformation": {"type_change": "int"}},
}
self.config = {"class": "od", "expver": "0001", "levtype": "sfc"}
self.fdbdatacube = FDBDatacube(self.config, axis_options=self.options)
self.slicer = HullSlicer()
self.API = Polytope(datacube=self.fdbdatacube, engine=self.slicer, axis_options=self.options)

# Testing different shapes
# @pytest.mark.skip(reason="can't install fdb branch on CI")
def test_fdb_datacube(self):
request = Request(
Span("step", 1, 15),
Select("levtype", ["sfc"]),
Select("date", [pd.Timestamp("20231102T000000")]),
Select("domain", ["g"]),
Select("expver", ["0001"]),
Select("param", ["167"]),
Select("class", ["od"]),
Select("stream", ["oper"]),
Select("type", ["fc"]),
Box(["latitude", "longitude"], [0, 0], [3, 5]),
)
time1 = time.time()
result = self.API.retrieve(request)
print("ENTIRE TIME")
print(time.time() - time1)
print(len(result.leaves))
16 changes: 16 additions & 0 deletions performance/fdb_scalability_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import matplotlib.pyplot as plt

fdb_time = [
7.6377081871032715 - 7.558288812637329,
73.57192325592041 - 72.99611115455627,
733.2706120014191 - 727.7059993743896,
4808.3157522678375 - 4770.814565420151,
]
num_extracted_points = [1986, 19226, 191543, 1267134]

# for the 1.3M points, we used 100 latitudes too...., maybe that's why it's not as linear...

plt.plot(num_extracted_points, fdb_time, marker="o")
plt.xlabel("Number of extracted points")
plt.ylabel("Polytope extraction time (in s)")
plt.show()
81 changes: 0 additions & 81 deletions polytope/datacube/backends/FDB_datacube.py

This file was deleted.

1 change: 1 addition & 0 deletions polytope/datacube/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ..backends.datacube import *
3 changes: 3 additions & 0 deletions polytope/datacube/backends/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ def _create_axes(self, name, values, transformation_type_key, transformation_opt
)
for blocked_axis in transformation.blocked_axes():
self.blocked_axes.append(blocked_axis)
for unwanted_axis in transformation.unwanted_axes():
self.unwanted_axes.append(unwanted_axis)
for axis_name in final_axis_names:
self.complete_axes.append(axis_name)
self.fake_axes.append(axis_name)
# if axis does not yet exist, create it

# first need to change the values so that we have right type
Expand Down
167 changes: 167 additions & 0 deletions polytope/datacube/backends/fdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
from copy import deepcopy

import pyfdb

from .datacube import Datacube, IndexTree


class FDBDatacube(Datacube):
def __init__(self, config={}, axis_options={}):
self.axis_options = axis_options
self.axis_counter = 0
self._axes = None
treated_axes = []
self.non_complete_axes = []
self.complete_axes = []
self.blocked_axes = []
self.unwanted_axes = []
self.fake_axes = []
self.unwanted_path = {}

partial_request = config
# Find values in the level 3 FDB datacube
# Will be in the form of a dictionary? {axis_name:values_available, ...}
self.fdb = pyfdb.FDB()
self.fdb_coordinates = self.fdb.axes(partial_request).as_dict()
self.fdb_coordinates["values"] = []
for name, values in self.fdb_coordinates.items():
values.sort()
options = axis_options.get(name, {})
self._check_and_add_axes(options, name, values)
treated_axes.append(name)
self.complete_axes.append(name)

# add other options to axis which were just created above like "lat" for the mapper transformations for eg
for name in self._axes:
if name not in treated_axes:
options = axis_options.get(name, {})
val = self._axes[name].type
self._check_and_add_axes(options, name, val)

def remove_unwanted_axes(self, leaf_path):
for axis in self.unwanted_axes:
leaf_path.pop(axis)
return leaf_path

def get(self, requests: IndexTree, leaf_path={}):
# First when request node is root, go to its children
if requests.axis.name == "root":
for c in requests.children:
self.get(c)
# If request node has no children, we have a leaf so need to assign fdb values to it
else:
key_value_path = {requests.axis.name: requests.value}
ax = requests.axis
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key(
key_value_path, leaf_path, self.unwanted_path
)
leaf_path |= key_value_path
if len(requests.children[0].children[0].children) == 0:
# remap this last key
self.get_2nd_last_values(requests, leaf_path)

# Otherwise remap the path for this key and iterate again over children
else:
for c in requests.children:
self.get(c, leaf_path)

def get_2nd_last_values(self, requests, leaf_path={}):
# In this function, we recursively loop over the last two layers of the tree and store the indices of the
# request ranges in those layers
lat_length = len(requests.children)
range_lengths = [False] * lat_length
current_start_idxs = [False] * lat_length
fdb_node_ranges = [False] * lat_length
for i in range(len(requests.children)):
lat_child = requests.children[i]
lon_length = len(lat_child.children)
range_lengths[i] = [1] * lon_length
current_start_idxs[i] = [None] * lon_length
fdb_node_ranges[i] = [[IndexTree.root] * lon_length] * lon_length
range_length = deepcopy(range_lengths[i])
current_start_idx = deepcopy(current_start_idxs[i])
fdb_range_nodes = deepcopy(fdb_node_ranges[i])
key_value_path = {lat_child.axis.name: lat_child.value}
ax = lat_child.axis
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key(
key_value_path, leaf_path, self.unwanted_path
)
leaf_path |= key_value_path
(range_lengths[i], current_start_idxs[i], fdb_node_ranges[i]) = self.get_last_layer_before_leaf(
lat_child, leaf_path, range_length, current_start_idx, fdb_range_nodes
)
self.give_fdb_val_to_node(leaf_path, range_lengths, current_start_idxs, fdb_node_ranges, lat_length)

def get_last_layer_before_leaf(self, requests, leaf_path, range_l, current_idx, fdb_range_n):
i = 0
for c in requests.children:
# now c are the leaves of the initial tree
key_value_path = {c.axis.name: c.value}
ax = c.axis
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key(
key_value_path, leaf_path, self.unwanted_path
)
leaf_path |= key_value_path
last_idx = key_value_path["values"]
if current_idx[i] is None:
current_idx[i] = last_idx
fdb_range_n[i][range_l[i] - 1] = c
else:
if last_idx == current_idx[i] + range_l[i]:
range_l[i] += 1
fdb_range_n[i][range_l[i] - 1] = c
else:
key_value_path = {c.axis.name: c.value}
ax = c.axis
(key_value_path, leaf_path, self.unwanted_path) = ax.unmap_path_key(
key_value_path, leaf_path, self.unwanted_path
)
leaf_path |= key_value_path
i += 1
current_start_idx = key_value_path["values"]
current_idx[i] = current_start_idx
return (range_l, current_idx, fdb_range_n)

def give_fdb_val_to_node(self, leaf_path, range_lengths, current_start_idx, fdb_range_nodes, lat_length):
(output_values, original_indices) = self.find_fdb_values(
leaf_path, range_lengths, current_start_idx, lat_length
)
new_fdb_range_nodes = []
new_range_lengths = []
for j in range(lat_length):
for i in range(len(range_lengths[j])):
if current_start_idx[j][i] is not None:
new_fdb_range_nodes.append(fdb_range_nodes[j][i])
new_range_lengths.append(range_lengths[j][i])
sorted_fdb_range_nodes = [new_fdb_range_nodes[i] for i in original_indices]
sorted_range_lengths = [new_range_lengths[i] for i in original_indices]
for i in range(len(sorted_fdb_range_nodes)):
for k in range(sorted_range_lengths[i]):
n = sorted_fdb_range_nodes[i][k]
n.result = output_values[0][0][0][i][k]

def find_fdb_values(self, path, range_lengths, current_start_idx, lat_length):
path.pop("values")
fdb_requests = []
interm_request_ranges = []
for i in range(lat_length):
for j in range(len(range_lengths[i])):
if current_start_idx[i][j] is not None:
current_request_ranges = (current_start_idx[i][j], current_start_idx[i][j] + range_lengths[i][j])
interm_request_ranges.append(current_request_ranges)
request_ranges_with_idx = list(enumerate(interm_request_ranges))
sorted_list = sorted(request_ranges_with_idx, key=lambda x: x[1][0])
original_indices, sorted_request_ranges = zip(*sorted_list)
fdb_requests.append(tuple((path, sorted_request_ranges)))
output_values = self.fdb.extract(fdb_requests)
return (output_values, original_indices)

def datacube_natural_indexes(self, axis, subarray):
indexes = subarray[axis.name]
return indexes

def select(self, path, unmapped_path):
return self.fdb_coordinates

def ax_vals(self, name):
return self.fdb_coordinates.get(name, None)
Loading

0 comments on commit f1dc3e9

Please sign in to comment.