diff --git a/polytope/datacube/backends/datacube.py b/polytope/datacube/backends/datacube.py index 06d463155..6b1f7107c 100644 --- a/polytope/datacube/backends/datacube.py +++ b/polytope/datacube/backends/datacube.py @@ -1,11 +1,10 @@ import logging -import math from abc import ABC, abstractmethod from typing import Any import xarray as xr -from ...utility.combinatorics import unique, validate_axes +from ...utility.combinatorics import validate_axes from ..datacube_axis import DatacubeAxis from ..index_tree import DatacubePath, IndexTree from ..transformations.datacube_transformations import ( @@ -15,7 +14,6 @@ class Datacube(ABC): - def __init__(self, axis_options=None, datacube_options=None): if axis_options is None: self.axis_options = {} @@ -110,48 +108,12 @@ def get_indices(self, path: DatacubePath, axis, lower, upper, method=None): """ path = self.fit_path(path) indexes = axis.find_indexes(path, self) - # TODO: this could also be handled by axis/transformations? - search_ranges = axis.remap([lower, upper]) - original_search_ranges = axis.to_intervals([lower, upper]) - # Find the offsets for each interval in the requested range, which we will need later - search_ranges_offset = [] - for r in original_search_ranges: - offset = axis.offset(r) - search_ranges_offset.append(offset) - idx_between = self._look_up_datacube(search_ranges, search_ranges_offset, indexes, axis, method) - # Remove duplicates even if difference of the order of the axis tolerance - if offset is not None: - # Note that we can only do unique if not dealing with time values - idx_between = unique(idx_between) + idx_between = axis.find_indices_between(indexes, lower, upper, self, method) logging.info(f"For axis {axis.name} between {lower} and {upper}, found indices {idx_between}") return idx_between - def _look_up_datacube(self, search_ranges, search_ranges_offset, indexes, axis, method): - idx_between = [] - # TODO: maybe this can all go inside find_indices_between for the different cyclic and other transformations - for i in range(len(search_ranges)): - r = search_ranges[i] - offset = search_ranges_offset[i] - low = r[0] - up = r[1] - indexes_between = axis.find_indices_between([indexes], low, up, self, method) - # Now the indexes_between are values on the cyclic range so need to remap them to their original - # values before returning them - for j in range(len(indexes_between)): - # if we have a special indexes between range that needs additional offset, treat it here - if len(indexes_between[j]) == 0: - idx_between = idx_between - else: - for k in range(len(indexes_between[j])): - if offset is None: - indexes_between[j][k] = indexes_between[j][k] - else: - indexes_between[j][k] = round(indexes_between[j][k] + offset, int(-math.log10(axis.tol))) - idx_between.append(indexes_between[j][k]) - return idx_between - def get_mapper(self, axis): """ Get the type mapper for a subaxis of the datacube given by label diff --git a/polytope/datacube/backends/xarray.py b/polytope/datacube/backends/xarray.py index bb63b1618..459db272f 100644 --- a/polytope/datacube/backends/xarray.py +++ b/polytope/datacube/backends/xarray.py @@ -68,7 +68,13 @@ def datacube_natural_indexes(self, axis, subarray): indexes = next(iter(subarray.xindexes.values())).to_pandas_index() else: if subarray[axis.name].values.ndim == 0: - indexes = [subarray[axis.name].values] + # NOTE how we handle the two special datetime and timedelta cases to conform with numpy arrays + if np.issubdtype(subarray[axis.name].values.dtype, np.datetime64): + indexes = [subarray[axis.name].astype("datetime64[us]").values] + elif np.issubdtype(subarray[axis.name].values.dtype, np.timedelta64): + indexes = [subarray[axis.name].astype("timedelta64[us]").values] + else: + indexes = [subarray[axis.name].values.tolist()] else: indexes = subarray[axis.name].values return indexes diff --git a/polytope/datacube/datacube_axis.py b/polytope/datacube/datacube_axis.py index b18ed8d67..4314fe7e9 100644 --- a/polytope/datacube/datacube_axis.py +++ b/polytope/datacube/datacube_axis.py @@ -95,38 +95,38 @@ def _remap_val_to_axis_range(self, value): value = transformation._remap_val_to_axis_range(value, self) return value - def find_standard_indices_between(self, index_ranges, low, up, datacube, method=None): + def find_standard_indices_between(self, indexes, low, up, datacube, method=None): indexes_between_ranges = [] - for indexes in index_ranges: - if self.name in datacube.complete_axes and self.name not in datacube.transformed_axes: - # Find the range of indexes between lower and upper - # https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html - # Assumes the indexes are already sorted (could sort to be sure) and monotonically increasing - if method == "surrounding" or method == "nearest": - start = indexes.searchsorted(low, "left") - end = indexes.searchsorted(up, "right") - start = max(start - 1, 0) - end = min(end + 1, len(indexes)) - indexes_between = indexes[start:end].to_list() - indexes_between_ranges.append(indexes_between) - else: - start = indexes.searchsorted(low, "left") - end = indexes.searchsorted(up, "right") - indexes_between = indexes[start:end].to_list() - indexes_between_ranges.append(indexes_between) + + if self.name in datacube.complete_axes and self.name not in datacube.transformed_axes: + # Find the range of indexes between lower and upper + # https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html + # Assumes the indexes are already sorted (could sort to be sure) and monotonically increasing + if method == "surrounding" or method == "nearest": + start = indexes.searchsorted(low, "left") + end = indexes.searchsorted(up, "right") + start = max(start - 1, 0) + end = min(end + 1, len(indexes)) + indexes_between = indexes[start:end].to_list() + indexes_between_ranges.extend(indexes_between) + else: + start = indexes.searchsorted(low, "left") + end = indexes.searchsorted(up, "right") + indexes_between = indexes[start:end].to_list() + indexes_between_ranges.extend(indexes_between) + else: + if method == "surrounding" or method == "nearest": + start = bisect.bisect_left(indexes, low) + end = bisect.bisect_right(indexes, up) + start = max(start - 1, 0) + end = min(end + 1, len(indexes)) + indexes_between = indexes[start:end] + indexes_between_ranges.extend(indexes_between) else: - if method == "surrounding" or method == "nearest": - start = bisect.bisect_left(indexes, low) - end = bisect.bisect_right(indexes, up) - start = max(start - 1, 0) - end = min(end + 1, len(indexes)) - indexes_between = indexes[start:end] - indexes_between_ranges.append(indexes_between) - else: - lower_idx = bisect.bisect_left(indexes, low) - upper_idx = bisect.bisect_right(indexes, up) - indexes_between = indexes[lower_idx:upper_idx] - indexes_between_ranges.append(indexes_between) + lower_idx = bisect.bisect_left(indexes, low) + upper_idx = bisect.bisect_right(indexes, up) + indexes_between = indexes[lower_idx:upper_idx] + indexes_between_ranges.extend(indexes_between) return indexes_between_ranges def find_indices_between(self, indexes_ranges, low, up, datacube, method=None): diff --git a/polytope/datacube/transformations/datacube_cyclic/datacube_cyclic.py b/polytope/datacube/transformations/datacube_cyclic/datacube_cyclic.py index 0dad0ca29..b0dd6e1d0 100644 --- a/polytope/datacube/transformations/datacube_cyclic/datacube_cyclic.py +++ b/polytope/datacube/transformations/datacube_cyclic/datacube_cyclic.py @@ -1,6 +1,7 @@ import math from copy import deepcopy +from ....utility.combinatorics import unique from ..datacube_transformations import DatacubeAxisTransformation @@ -136,3 +137,35 @@ def to_intervals(self, range, intervals, axis): # Once we have added all the in-between ranges, we need to add the last interval intervals.append([new_up, upper]) return intervals + + def find_indices_between(self, indexes_ranges, low, up, datacube, method, indexes_between_ranges, axis): + search_ranges = self.remap([low, up], [], axis) + original_search_ranges = self.to_intervals([low, up], [], axis) + # Find the offsets for each interval in the requested range, which we will need later + search_ranges_offset = [] + for r in original_search_ranges: + offset = self.offset(r, axis, 0) + search_ranges_offset.append(offset) + idx_between = [] + for i in range(len(search_ranges)): + r = search_ranges[i] + offset = search_ranges_offset[i] + low = r[0] + up = r[1] + indexes_between = axis.find_standard_indices_between(indexes_ranges, low, up, datacube, method) + # Now the indexes_between are values on the cyclic range so need to remap them to their original + # values before returning them + # if we have a special indexes between range that needs additional offset, treat it here + if len(indexes_between) == 0: + idx_between = idx_between + else: + for k in range(len(indexes_between)): + if offset is None: + indexes_between[k] = indexes_between[k] + else: + indexes_between[k] = round(indexes_between[k] + offset, int(-math.log10(axis.tol))) + idx_between.append(indexes_between[k]) + if offset is not None: + # Note that we can only do unique if not dealing with time values + idx_between = unique(idx_between) + return idx_between diff --git a/polytope/datacube/transformations/datacube_reverse/datacube_reverse.py b/polytope/datacube/transformations/datacube_reverse/datacube_reverse.py index ec2e84da3..eed563c9e 100644 --- a/polytope/datacube/transformations/datacube_reverse/datacube_reverse.py +++ b/polytope/datacube/transformations/datacube_reverse/datacube_reverse.py @@ -29,38 +29,37 @@ def find_modified_indexes(self, indexes, path, datacube, axis): ordered_indices = indexes return ordered_indices - def find_indices_between(self, indexes_ranges, low, up, datacube, method, indexes_between_ranges, axis): + def find_indices_between(self, indexes, low, up, datacube, method, indexes_between_ranges, axis): indexes_between_ranges = [] if axis.name == self.name: - for indexes in indexes_ranges: - if axis.name in datacube.complete_axes: - # Find the range of indexes between lower and upper - # https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html - # Assumes the indexes are already sorted (could sort to be sure) and monotonically - # increasing - if method == "surrounding" or method == "nearest": - start = indexes.searchsorted(low, "left") - end = indexes.searchsorted(up, "right") - start = max(start - 1, 0) - end = min(end + 1, len(indexes)) - indexes_between = indexes[start:end].to_list() - indexes_between_ranges.append(indexes_between) - else: - start = indexes.searchsorted(low, "left") - end = indexes.searchsorted(up, "right") - indexes_between = indexes[start:end].to_list() - indexes_between_ranges.append(indexes_between) + if axis.name in datacube.complete_axes: + # Find the range of indexes between lower and upper + # https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html + # Assumes the indexes are already sorted (could sort to be sure) and monotonically + # increasing + if method == "surrounding" or method == "nearest": + start = indexes.searchsorted(low, "left") + end = indexes.searchsorted(up, "right") + start = max(start - 1, 0) + end = min(end + 1, len(indexes)) + indexes_between = indexes[start:end].to_list() + indexes_between_ranges.extend(indexes_between) else: - if method == "surrounding" or method == "nearest": - end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1 - start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y) - start = max(start_idx - 1, 0) - end = min(end_idx + 1, len(indexes)) - indexes_between = indexes[start:end] - indexes_between_ranges.append(indexes_between) - else: - end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1 - start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y) - indexes_between = indexes[start_idx:end_idx] - indexes_between_ranges.append(indexes_between) + start = indexes.searchsorted(low, "left") + end = indexes.searchsorted(up, "right") + indexes_between = indexes[start:end].to_list() + indexes_between_ranges.extend(indexes_between) + else: + if method == "surrounding" or method == "nearest": + end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1 + start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y) + start = max(start_idx - 1, 0) + end = min(end_idx + 1, len(indexes)) + indexes_between = indexes[start:end] + indexes_between_ranges.extend(indexes_between) + else: + end_idx = bisect_left_cmp(indexes, low, cmp=lambda x, y: x > y) + 1 + start_idx = bisect_right_cmp(indexes, up, cmp=lambda x, y: x > y) + indexes_between = indexes[start_idx:end_idx] + indexes_between_ranges.extend(indexes_between) return indexes_between_ranges diff --git a/polytope/engine/hullslicer.py b/polytope/engine/hullslicer.py index d50824c6e..8e14eeae2 100644 --- a/polytope/engine/hullslicer.py +++ b/polytope/engine/hullslicer.py @@ -167,25 +167,17 @@ def extract(self, datacube: Datacube, polytopes: List[ConvexPolytope]): if node.axis.name == datacube.axis_with_identical_structure_after: stored_val = node.value cached_node = node - # logging.info("Caching number 1") elif node.axis.name == datacube.axis_with_identical_structure_after and node.value != stored_val: repeated_sub_nodes.append(node) del node["unsliced_polytopes"] - # logging.info(f"Skipping number {node.value}") continue self._build_branch(ax, node, datacube, next_nodes) current_nodes = next_nodes - # logging.info("=== BEFORE COPYING ===") - for n in repeated_sub_nodes: - # logging.info(f"Copying children for number {n.value}") n.copy_children_from_other(cached_node) - # logging.info("=== AFTER COPYING ===") - # request.pprint() - request.merge(r) return request diff --git a/tests/test_datacube_axes_init.py b/tests/test_datacube_axes_init.py index 5612ac305..bedaf8af7 100644 --- a/tests/test_datacube_axes_init.py +++ b/tests/test_datacube_axes_init.py @@ -75,8 +75,9 @@ def test_created_axes(self): assert path == {} assert unmapped_path == {"latitude": 89.94618771566562} assert path_key == {"values": 0} - assert lat_ax.find_indices_between([[89.94618771566562, 89.87647835333229]], 89.87, 90, self.datacube, 0) == [ - [89.94618771566562, 89.87647835333229] + assert lat_ax.find_indices_between([89.94618771566562, 89.87647835333229], 89.87, 90, self.datacube, 0) == [ + 89.94618771566562, + 89.87647835333229, ] @pytest.mark.internet