Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/refactor cyclic transformation #109

Merged
merged 2 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 2 additions & 40 deletions polytope/datacube/backends/datacube.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import logging
import math
from abc import ABC, abstractmethod
from typing import Any

import xarray as xr

from ...utility.combinatorics import unique, validate_axes
from ...utility.combinatorics import validate_axes
from ..datacube_axis import DatacubeAxis
from ..index_tree import DatacubePath, IndexTree
from ..transformations.datacube_transformations import (
Expand All @@ -15,7 +14,6 @@


class Datacube(ABC):

def __init__(self, axis_options=None, datacube_options=None):
if axis_options is None:
self.axis_options = {}
Expand Down Expand Up @@ -110,48 +108,12 @@ def get_indices(self, path: DatacubePath, axis, lower, upper, method=None):
"""
path = self.fit_path(path)
indexes = axis.find_indexes(path, self)
# TODO: this could also be handled by axis/transformations?
search_ranges = axis.remap([lower, upper])
original_search_ranges = axis.to_intervals([lower, upper])
# Find the offsets for each interval in the requested range, which we will need later
search_ranges_offset = []
for r in original_search_ranges:
offset = axis.offset(r)
search_ranges_offset.append(offset)
idx_between = self._look_up_datacube(search_ranges, search_ranges_offset, indexes, axis, method)
# Remove duplicates even if difference of the order of the axis tolerance
if offset is not None:
# Note that we can only do unique if not dealing with time values
idx_between = unique(idx_between)
idx_between = axis.find_indices_between(indexes, lower, upper, self, method)

logging.info(f"For axis {axis.name} between {lower} and {upper}, found indices {idx_between}")

return idx_between

def _look_up_datacube(self, search_ranges, search_ranges_offset, indexes, axis, method):
idx_between = []
# TODO: maybe this can all go inside find_indices_between for the different cyclic and other transformations
for i in range(len(search_ranges)):
r = search_ranges[i]
offset = search_ranges_offset[i]
low = r[0]
up = r[1]
indexes_between = axis.find_indices_between([indexes], low, up, self, method)
# Now the indexes_between are values on the cyclic range so need to remap them to their original
# values before returning them
for j in range(len(indexes_between)):
# if we have a special indexes between range that needs additional offset, treat it here
if len(indexes_between[j]) == 0:
idx_between = idx_between
else:
for k in range(len(indexes_between[j])):
if offset is None:
indexes_between[j][k] = indexes_between[j][k]
else:
indexes_between[j][k] = round(indexes_between[j][k] + offset, int(-math.log10(axis.tol)))
idx_between.append(indexes_between[j][k])
return idx_between

def get_mapper(self, axis):
"""
Get the type mapper for a subaxis of the datacube given by label
Expand Down
8 changes: 7 additions & 1 deletion polytope/datacube/backends/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ def datacube_natural_indexes(self, axis, subarray):
indexes = next(iter(subarray.xindexes.values())).to_pandas_index()
else:
if subarray[axis.name].values.ndim == 0:
indexes = [subarray[axis.name].values]
# NOTE how we handle the two special datetime and timedelta cases to conform with numpy arrays
if np.issubdtype(subarray[axis.name].values.dtype, np.datetime64):
indexes = [subarray[axis.name].astype("datetime64[us]").values]
elif np.issubdtype(subarray[axis.name].values.dtype, np.timedelta64):
indexes = [subarray[axis.name].astype("timedelta64[us]").values]
else:
indexes = [subarray[axis.name].values.tolist()]
else:
indexes = subarray[axis.name].values
return indexes
Expand Down
60 changes: 30 additions & 30 deletions polytope/datacube/datacube_axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,38 +95,38 @@ def _remap_val_to_axis_range(self, value):
value = transformation._remap_val_to_axis_range(value, self)
return value

def find_standard_indices_between(self, index_ranges, low, up, datacube, method=None):
def find_standard_indices_between(self, indexes, low, up, datacube, method=None):
indexes_between_ranges = []
for indexes in index_ranges:
if self.name in datacube.complete_axes and self.name not in datacube.transformed_axes:
# Find the range of indexes between lower and upper
# https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html
# Assumes the indexes are already sorted (could sort to be sure) and monotonically increasing
if method == "surrounding" or method == "nearest":
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
start = max(start - 1, 0)
end = min(end + 1, len(indexes))
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.append(indexes_between)
else:
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.append(indexes_between)

if self.name in datacube.complete_axes and self.name not in datacube.transformed_axes:
# Find the range of indexes between lower and upper
# https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html
# Assumes the indexes are already sorted (could sort to be sure) and monotonically increasing
if method == "surrounding" or method == "nearest":
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
start = max(start - 1, 0)
end = min(end + 1, len(indexes))
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.extend(indexes_between)
else:
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.extend(indexes_between)
else:
if method == "surrounding" or method == "nearest":
start = bisect.bisect_left(indexes, low)
end = bisect.bisect_right(indexes, up)
start = max(start - 1, 0)
end = min(end + 1, len(indexes))
indexes_between = indexes[start:end]
indexes_between_ranges.extend(indexes_between)
else:
if method == "surrounding" or method == "nearest":
start = bisect.bisect_left(indexes, low)
end = bisect.bisect_right(indexes, up)
start = max(start - 1, 0)
end = min(end + 1, len(indexes))
indexes_between = indexes[start:end]
indexes_between_ranges.append(indexes_between)
else:
lower_idx = bisect.bisect_left(indexes, low)
upper_idx = bisect.bisect_right(indexes, up)
indexes_between = indexes[lower_idx:upper_idx]
indexes_between_ranges.append(indexes_between)
lower_idx = bisect.bisect_left(indexes, low)
upper_idx = bisect.bisect_right(indexes, up)
indexes_between = indexes[lower_idx:upper_idx]
indexes_between_ranges.extend(indexes_between)
return indexes_between_ranges

def find_indices_between(self, indexes_ranges, low, up, datacube, method=None):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import math
from copy import deepcopy

from ....utility.combinatorics import unique
from ..datacube_transformations import DatacubeAxisTransformation


Expand Down Expand Up @@ -136,3 +137,35 @@ def to_intervals(self, range, intervals, axis):
# Once we have added all the in-between ranges, we need to add the last interval
intervals.append([new_up, upper])
return intervals

def find_indices_between(self, indexes_ranges, low, up, datacube, method, indexes_between_ranges, axis):
search_ranges = self.remap([low, up], [], axis)
original_search_ranges = self.to_intervals([low, up], [], axis)
# Find the offsets for each interval in the requested range, which we will need later
search_ranges_offset = []
for r in original_search_ranges:
offset = self.offset(r, axis, 0)
search_ranges_offset.append(offset)
idx_between = []
for i in range(len(search_ranges)):
r = search_ranges[i]
offset = search_ranges_offset[i]
low = r[0]
up = r[1]
indexes_between = axis.find_standard_indices_between(indexes_ranges, low, up, datacube, method)
# Now the indexes_between are values on the cyclic range so need to remap them to their original
# values before returning them
# if we have a special indexes between range that needs additional offset, treat it here
if len(indexes_between) == 0:
idx_between = idx_between
else:
for k in range(len(indexes_between)):
if offset is None:
indexes_between[k] = indexes_between[k]
else:
indexes_between[k] = round(indexes_between[k] + offset, int(-math.log10(axis.tol)))
idx_between.append(indexes_between[k])
if offset is not None:
# Note that we can only do unique if not dealing with time values
idx_between = unique(idx_between)
return idx_between
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,37 @@ def find_modified_indexes(self, indexes, path, datacube, axis):
ordered_indices = indexes
return ordered_indices

def find_indices_between(self, indexes_ranges, low, up, datacube, method, indexes_between_ranges, axis):
def find_indices_between(self, indexes, low, up, datacube, method, indexes_between_ranges, axis):
indexes_between_ranges = []
if axis.name == self.name:
for indexes in indexes_ranges:
if axis.name in datacube.complete_axes:
# Find the range of indexes between lower and upper
# https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html
# Assumes the indexes are already sorted (could sort to be sure) and monotonically
# increasing
if method == "surrounding" or method == "nearest":
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
start = max(start - 1, 0)
end = min(end + 1, len(indexes))
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.append(indexes_between)
else:
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.append(indexes_between)
if axis.name in datacube.complete_axes:
# Find the range of indexes between lower and upper
# https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html
# Assumes the indexes are already sorted (could sort to be sure) and monotonically
# increasing
if method == "surrounding" or method == "nearest":
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
start = max(start - 1, 0)
end = min(end + 1, len(indexes))
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.extend(indexes_between)
else:
if method == "surrounding" or method == "nearest":
end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1
start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y)
start = max(start_idx - 1, 0)
end = min(end_idx + 1, len(indexes))
indexes_between = indexes[start:end]
indexes_between_ranges.append(indexes_between)
else:
end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1
start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y)
indexes_between = indexes[start_idx:end_idx]
indexes_between_ranges.append(indexes_between)
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
indexes_between = indexes[start:end].to_list()
indexes_between_ranges.extend(indexes_between)
else:
if method == "surrounding" or method == "nearest":
end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1
start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y)
start = max(start_idx - 1, 0)
end = min(end_idx + 1, len(indexes))
indexes_between = indexes[start:end]
indexes_between_ranges.extend(indexes_between)
else:
end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1
start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y)
indexes_between = indexes[start_idx:end_idx]
indexes_between_ranges.extend(indexes_between)
return indexes_between_ranges
8 changes: 0 additions & 8 deletions polytope/engine/hullslicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,25 +167,17 @@ def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]):
if node.axis.name == datacube.axis_with_identical_structure_after:
stored_val = node.value
cached_node = node
# logging.info("Caching number 1")
elif node.axis.name == datacube.axis_with_identical_structure_after and node.value != stored_val:
repeated_sub_nodes.append(node)
del node["unsliced_polytopes"]
# logging.info(f"Skipping number {node.value}")
continue

self._build_branch(ax, node, datacube, next_nodes)
current_nodes = next_nodes

# logging.info("=== BEFORE COPYING ===")

for n in repeated_sub_nodes:
# logging.info(f"Copying children for number {n.value}")
n.copy_children_from_other(cached_node)

# logging.info("=== AFTER COPYING ===")
# request.pprint()

request.merge(r)
return request

Expand Down
5 changes: 3 additions & 2 deletions tests/test_datacube_axes_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ def test_created_axes(self):
assert path == {}
assert unmapped_path == {"latitude": 89.94618771566562}
assert path_key == {"values": 0}
assert lat_ax.find_indices_between([[89.94618771566562, 89.87647835333229]], 89.87, 90, self.datacube, 0) == [
[89.94618771566562, 89.87647835333229]
assert lat_ax.find_indices_between([89.94618771566562, 89.87647835333229], 89.87, 90, self.datacube, 0) == [
89.94618771566562,
89.87647835333229,
]

@pytest.mark.internet
Expand Down
Loading