From 1c8c19d01e486bf4738098003161627c6023b49f Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Tue, 22 Aug 2023 19:43:54 +0200 Subject: [PATCH 1/6] Extract time step selection WIP --- mikeio/_time.py | 63 +++++++++++++++++++++ mikeio/dfs/_dfs.py | 136 ++++++++++++++++++++++++--------------------- 2 files changed, 135 insertions(+), 64 deletions(-) create mode 100644 mikeio/_time.py diff --git a/mikeio/_time.py b/mikeio/_time.py new file mode 100644 index 000000000..e4b6887fa --- /dev/null +++ b/mikeio/_time.py @@ -0,0 +1,63 @@ + +from datetime import datetime +from dataclasses import dataclass +from typing import List, Iterable, Optional + +import pandas as pd + + +@dataclass +class DateTimeSelector: + + index: pd.DatetimeIndex + + @property + def __len__(self): + return len(self.index) + + def isel(self, time: Optional[int | Iterable[int] | str | datetime | pd.DatetimeIndex | slice]) -> List[int]: + + indices = list(range(len(self.index))) + + if time is None: + return indices + + if isinstance(time, int): + return [indices[time]] + + if isinstance(time, (datetime, str)): + return [self.index.get_loc(time)] + + if isinstance(time, slice): + if isinstance(time.start, int) or isinstance(time.stop, int): + return indices[time] + else: + s = self.index.slice_indexer(time.start, time.stop) + return list(range(s.start, s.stop)) + + if isinstance(time, Iterable): + + # recursive call + return [self.isel(t)[0] for t in time] + + + + +if __name__ == "__main__": + + idx = pd.date_range("2000-01-01", periods=4, freq="D") + assert len(idx) == 4 + + dts = DateTimeSelector(idx) + + assert dts.isel(None) == [0,1,2,3] + assert dts.isel(0) == [0] + assert dts.isel(-1) == [3] + assert dts.isel([0,1]) == [0,1] + assert dts.isel("2000-01-02") == [1] + assert dts.isel(["2000-01-02", "2000-01-03"]) == [1,2] + assert dts.isel(idx) == [0,1,2,3] + assert dts.isel(slice(1,4)) == [1,2,3] + assert dts.isel(slice("2000-01-02", "2000-01-04")) == [1,2,3] + + \ No newline at end of file diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index 58defc959..a3147b920 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -23,6 +23,7 @@ from ..eum import EUMType, EUMUnit, ItemInfo, ItemInfoList, TimeStepUnit from ..exceptions import DataDimensionMismatch, ItemsError from ..spatial import GeometryUndefined +from .._time import DateTimeSelector def _read_item_time_step( @@ -116,75 +117,82 @@ def _valid_item_numbers( def _valid_timesteps(dfsFileInfo: DfsFileInfo, time_steps) -> Tuple[bool, List[int]]: + time_axis = dfsFileInfo.TimeAxis + single_time_selected = False - if isinstance(time_steps, int) and np.isscalar(time_steps): + if isinstance(time_steps, (int,str, datetime)): # and np.isscalar(time_steps): single_time_selected = True - n_steps_file = dfsFileInfo.TimeAxis.NumberOfTimeSteps - if time_steps is None: - return single_time_selected, list(range(n_steps_file)) - - if isinstance(time_steps, int): - time_steps = [time_steps] - - if isinstance(time_steps, str): - parts = time_steps.split(",") - if len(parts) == 1: - parts.append(parts[0]) # end=start - - if parts[0] == "": - time_steps = slice(parts[1]) # stop only - elif parts[1] == "": - time_steps = slice(parts[0], None) # start only - else: - time_steps = slice(parts[0], parts[1]) - - if isinstance(time_steps, (slice, pd.Timestamp, datetime, pd.DatetimeIndex)): - if dfsFileInfo.TimeAxis.TimeAxisType != TimeAxisType.CalendarEquidistant: - # TODO: handle non-equidistant calendar - raise ValueError( - "Only equidistant calendar files are supported for this type of time_step argument" - ) - - start_time_file = dfsFileInfo.TimeAxis.StartDateTime - time_step_file = dfsFileInfo.TimeAxis.TimeStep - freq = pd.Timedelta(seconds=time_step_file) - time = pd.date_range(start_time_file, periods=n_steps_file, freq=freq) + n_steps_file = time_axis.NumberOfTimeSteps + # if time_steps is None: + # return single_time_selected, list(range(n_steps_file)) + + # if isinstance(time_steps, int): + # time_steps = [time_steps] + + # if isinstance(time_steps, str): + # parts = time_steps.split(",") + # if len(parts) == 1: + # parts.append(parts[0]) # end=start + + # if parts[0] == "": + # time_steps = slice(parts[1]) # stop only + # elif parts[1] == "": + # time_steps = slice(parts[0], None) # start only + # else: + # time_steps = slice(parts[0], parts[1]) + + # if isinstance(time_steps, (slice, pd.Timestamp, datetime, pd.DatetimeIndex)): + # if dfsFileInfo.TimeAxis.TimeAxisType != TimeAxisType.CalendarEquidistant: + # # TODO: handle non-equidistant calendar + # raise ValueError( + # "Only equidistant calendar files are supported for this type of time_step argument" + # ) + + start_time_file = time_axis.StartDateTime + time_step_file = time_axis.TimeStep + freq = pd.Timedelta(seconds=time_step_file) + time = pd.date_range(start_time_file, periods=n_steps_file, freq=freq) - if isinstance(time_steps, slice): - if isinstance(time_steps.start, int) or isinstance(time_steps.stop, int): - time_steps = list(range(*time_steps.indices(n_steps_file))) - else: - s = time.slice_indexer(time_steps.start, time_steps.stop) - time_steps = list(range(s.start, s.stop)) - elif isinstance(time_steps, Sequence) and isinstance(time_steps[0], int): - time_steps = np.array(time_steps) - time_steps[time_steps < 0] = n_steps_file + time_steps[time_steps < 0] - time_steps = list(time_steps) - - if max(time_steps) > (n_steps_file - 1): - raise IndexError(f"Timestep cannot be larger than {n_steps_file}") - if min(time_steps) < 0: - raise IndexError(f"Timestep cannot be less than {-n_steps_file}") - elif isinstance(time_steps, Iterable): - steps = [] - for t in time_steps: - _, step = _valid_timesteps(dfsFileInfo, t) - steps.append(step[0]) - single_time_selected = len(steps) == 1 - time_steps = steps - - elif isinstance(time_steps, (pd.Timestamp, datetime)): - s = time.slice_indexer(time_steps, time_steps) - time_steps = list(range(s.start, s.stop)) - #elif isinstance(time_steps, pd.DatetimeIndex): - # time_steps = list(time.get_indexer(time_steps)) + - else: - raise TypeError(f"Indexing is not possible with {type(time_steps)}") - if len(time_steps) == 1: - single_time_selected = True - return single_time_selected, time_steps + dts = DateTimeSelector(time) + return single_time_selected, dts.isel(time_steps) + + # if isinstance(time_steps, slice): + # if isinstance(time_steps.start, int) or isinstance(time_steps.stop, int): + # time_steps = list(range(*time_steps.indices(n_steps_file))) + # else: + # s = time.slice_indexer(time_steps.start, time_steps.stop) + # time_steps = list(range(s.start, s.stop)) + # elif isinstance(time_steps, Sequence) and isinstance(time_steps[0], int): + # time_steps = np.array(time_steps) + # time_steps[time_steps < 0] = n_steps_file + time_steps[time_steps < 0] + # time_steps = list(time_steps) + + # if max(time_steps) > (n_steps_file - 1): + # raise IndexError(f"Timestep cannot be larger than {n_steps_file}") + # if min(time_steps) < 0: + # raise IndexError(f"Timestep cannot be less than {-n_steps_file}") + # elif isinstance(time_steps, Iterable): + # steps = [] + # for t in time_steps: + # _, step = _valid_timesteps(dfsFileInfo, t) + # steps.append(step[0]) + # single_time_selected = len(steps) == 1 + # time_steps = steps + + # elif isinstance(time_steps, (pd.Timestamp, datetime)): + # s = time.slice_indexer(time_steps, time_steps) + # time_steps = list(range(s.start, s.stop)) + # #elif isinstance(time_steps, pd.DatetimeIndex): + # # time_steps = list(time.get_indexer(time_steps)) + + # else: + # raise TypeError(f"Indexing is not possible with {type(time_steps)}") + # if len(time_steps) == 1: + # single_time_selected = True + # return single_time_selected, time_steps def _item_numbers_by_name(dfsItemInfo, item_names: List[str], ignore_first: bool=False) -> List[int]: From 9ef3a962c7d30dc436b2eb038b3f0fb27a46f1c7 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Wed, 23 Aug 2023 10:15:15 +0200 Subject: [PATCH 2/6] Handle non-equidistant time --- mikeio/_time.py | 12 +++++++++--- mikeio/dfs/_dfs.py | 37 ++++++++++++++++++++++++++----------- tests/test_consistency.py | 5 +++-- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/mikeio/_time.py b/mikeio/_time.py index e4b6887fa..803d5142e 100644 --- a/mikeio/_time.py +++ b/mikeio/_time.py @@ -1,4 +1,4 @@ - +from __future__ import annotations from datetime import datetime from dataclasses import dataclass from typing import List, Iterable, Optional @@ -26,8 +26,12 @@ def isel(self, time: Optional[int | Iterable[int] | str | datetime | pd.Datetime return [indices[time]] if isinstance(time, (datetime, str)): - return [self.index.get_loc(time)] - + loc = self.index.get_loc(time) + if isinstance(loc, int): + return [loc] + elif isinstance(loc, slice): + return list(range(loc.start, loc.stop)) + if isinstance(time, slice): if isinstance(time.start, int) or isinstance(time.stop, int): return indices[time] @@ -39,6 +43,8 @@ def isel(self, time: Optional[int | Iterable[int] | str | datetime | pd.Datetime # recursive call return [self.isel(t)[0] for t in time] + + return indices diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index a3147b920..f2e298352 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -1,12 +1,11 @@ import warnings from abc import abstractmethod -from datetime import datetime, timedelta -from typing import Iterable, List, Optional, Tuple, Union, Sequence +from datetime import datetime +from typing import List, Optional, Tuple, Union, Sequence import numpy as np import pandas as pd from tqdm import tqdm, trange -from copy import deepcopy from mikecore.DfsFactory import DfsFactory from mikecore.DfsFile import ( DfsDynamicItemInfo, @@ -120,7 +119,7 @@ def _valid_timesteps(dfsFileInfo: DfsFileInfo, time_steps) -> Tuple[bool, List[i time_axis = dfsFileInfo.TimeAxis single_time_selected = False - if isinstance(time_steps, (int,str, datetime)): # and np.isscalar(time_steps): + if isinstance(time_steps, (int, datetime)): single_time_selected = True n_steps_file = time_axis.NumberOfTimeSteps @@ -149,15 +148,31 @@ def _valid_timesteps(dfsFileInfo: DfsFileInfo, time_steps) -> Tuple[bool, List[i # "Only equidistant calendar files are supported for this type of time_step argument" # ) - start_time_file = time_axis.StartDateTime - time_step_file = time_axis.TimeStep - freq = pd.Timedelta(seconds=time_step_file) - time = pd.date_range(start_time_file, periods=n_steps_file, freq=freq) + if time_axis.TimeAxisType != TimeAxisType.CalendarEquidistant: + start_time_file = datetime(1970, 1, 1) # TODO is this the proper epoch, should this magic number be somewhere else? + else: + start_time_file = time_axis.StartDateTime - + if time_axis.TimeAxisType == TimeAxisType.CalendarEquidistant: + time_step_file = time_axis.TimeStep + freq = pd.Timedelta(seconds=time_step_file) + time = pd.date_range(start_time_file, periods=n_steps_file, freq=freq) + elif time_axis.TimeAxisType == TimeAxisType.CalendarNonEquidistant: + idx = list(range(n_steps_file)) + + if isinstance(time_steps, int): + return True, [idx[time_steps]] + return single_time_selected, idx dts = DateTimeSelector(time) - return single_time_selected, dts.isel(time_steps) + + idx = dts.isel(time_steps) + + if isinstance(time_steps, str): + if len(idx) == 1: + single_time_selected = True + + return single_time_selected, idx # if isinstance(time_steps, slice): # if isinstance(time_steps.start, int) or isinstance(time_steps.stop, int): @@ -395,7 +410,7 @@ def _read_header(self): }: self._start_time = dfs.FileInfo.TimeAxis.StartDateTime else: # relative time axis - self._start_time = datetime(1970, 1, 1) + self._start_time = datetime(1970, 1, 1) # TODO is this the proper epoch, should this magic number be somewhere else? if hasattr(dfs.FileInfo.TimeAxis, "TimeStep"): self._timestep_in_seconds = ( dfs.FileInfo.TimeAxis.TimeStep diff --git a/tests/test_consistency.py b/tests/test_consistency.py index 528b1a332..60ae83cbc 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -422,12 +422,13 @@ def test_read_dfs_time_slice_str(): assert dsr.shape == dsgetitem.shape -def test_read_dfs_time_selection_str_comma(): +def test_read_dfs_time_selection_str_slice(): extensions = ["dfs0", "dfs2", "dfs1", "dfs0"] for ext in extensions: filename = f"tests/testdata/consistency/oresundHD.{ext}" - time = "2018-03-08,2018-03-10" + + time = slice("2018-03-08","2018-03-10") ds = mikeio.read(filename=filename) dssel = ds.sel(time=time) assert dssel.n_timesteps == 3 From e894920428f2e2faca92af9e930778dbc14c4841 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Wed, 23 Aug 2023 10:33:52 +0200 Subject: [PATCH 3/6] Unit test --- mikeio/_time.py | 52 +++++++++++++++------- mikeio/dfs/_dfs.py | 107 +++++++++++++-------------------------------- tests/test_time.py | 20 +++++++++ 3 files changed, 86 insertions(+), 93 deletions(-) create mode 100644 tests/test_time.py diff --git a/mikeio/_time.py b/mikeio/_time.py index 803d5142e..33695bbe3 100644 --- a/mikeio/_time.py +++ b/mikeio/_time.py @@ -8,46 +8,64 @@ @dataclass class DateTimeSelector: + """Helper class for selecting time steps from a pandas DatetimeIndex""" index: pd.DatetimeIndex - @property def __len__(self): return len(self.index) - def isel(self, time: Optional[int | Iterable[int] | str | datetime | pd.DatetimeIndex | slice]) -> List[int]: + def isel(self, x: Optional[int | Iterable[int] | str | datetime | pd.DatetimeIndex | slice]) -> List[int]: + """Select time steps from a pandas DatetimeIndex + + Parameters + ---------- + x : int, Iterable[int], str, datetime, pd.DatetimeIndex, slice + Time steps to select, negative indices are supported + + Returns + ------- + List[int] + List of indices in the range (0, len(index) + Examples + -------- + >>> idx = pd.date_range("2000-01-01", periods=4, freq="D") + >>> dts = DateTimeSelector(idx) + >>> dts.isel(None) + [0, 1, 2, 3] + >>> dts.isel(0) + [0] + >>> dts.isel(-1) + [3] + """ indices = list(range(len(self.index))) - if time is None: + if x is None: return indices - if isinstance(time, int): - return [indices[time]] + if isinstance(x, int): + return [indices[x]] - if isinstance(time, (datetime, str)): - loc = self.index.get_loc(time) + if isinstance(x, (datetime, str)): + loc = self.index.get_loc(x) if isinstance(loc, int): return [loc] elif isinstance(loc, slice): return list(range(loc.start, loc.stop)) - if isinstance(time, slice): - if isinstance(time.start, int) or isinstance(time.stop, int): - return indices[time] + if isinstance(x, slice): + if isinstance(x.start, int) or isinstance(x.stop, int): + return indices[x] else: - s = self.index.slice_indexer(time.start, time.stop) + s = self.index.slice_indexer(x.start, x.stop) return list(range(s.start, s.stop)) - if isinstance(time, Iterable): - - # recursive call - return [self.isel(t)[0] for t in time] + if isinstance(x, Iterable): + return [self.isel(t)[0] for t in x] return indices - - if __name__ == "__main__": diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index f3d34e07c..8e3d804bc 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -78,10 +78,10 @@ def _valid_item_numbers( n_items_file = len(dfsItemInfo) - start_idx if items is None: return list(range(n_items_file)) - + # Handling scalar and sequences is a bit tricky - - item_numbers : List[int] = [] + + item_numbers: List[int] = [] # check if items is a scalar (int or str) if isinstance(items, (int, str)): @@ -90,13 +90,13 @@ def _valid_item_numbers( dfsItemInfo=dfsItemInfo, search=items, start_idx=start_idx ) elif isinstance(items, str): - item_number = _item_numbers_by_name(dfsItemInfo, [items], ignore_first)[0] + item_number = _item_numbers_by_name(dfsItemInfo, [items], ignore_first)[0] return [item_number] elif isinstance(items, int): if (items < 0) or (items >= n_items_file): raise ItemsError(n_items_file) return [items] - + assert isinstance(items, Sequence) for item in items: if isinstance(item, str): @@ -123,43 +123,23 @@ def _valid_timesteps(dfsFileInfo: DfsFileInfo, time_steps) -> Tuple[bool, List[i if isinstance(time_steps, (int, datetime)): single_time_selected = True - n_steps_file = time_axis.NumberOfTimeSteps - # if time_steps is None: - # return single_time_selected, list(range(n_steps_file)) - - # if isinstance(time_steps, int): - # time_steps = [time_steps] - - # if isinstance(time_steps, str): - # parts = time_steps.split(",") - # if len(parts) == 1: - # parts.append(parts[0]) # end=start - - # if parts[0] == "": - # time_steps = slice(parts[1]) # stop only - # elif parts[1] == "": - # time_steps = slice(parts[0], None) # start only - # else: - # time_steps = slice(parts[0], parts[1]) - - # if isinstance(time_steps, (slice, pd.Timestamp, datetime, pd.DatetimeIndex)): - # if dfsFileInfo.TimeAxis.TimeAxisType != TimeAxisType.CalendarEquidistant: - # # TODO: handle non-equidistant calendar - # raise ValueError( - # "Only equidistant calendar files are supported for this type of time_step argument" - # ) + nt = time_axis.NumberOfTimeSteps if time_axis.TimeAxisType != TimeAxisType.CalendarEquidistant: - start_time_file = datetime(1970, 1, 1) # TODO is this the proper epoch, should this magic number be somewhere else? + # TODO is this the proper epoch, should this magic number be somewhere else? + start_time_file = datetime(1970, 1, 1) else: start_time_file = time_axis.StartDateTime - if time_axis.TimeAxisType == TimeAxisType.CalendarEquidistant: + if time_axis.TimeAxisType in ( + TimeAxisType.CalendarEquidistant, + TimeAxisType.TimeEquidistant, + ): time_step_file = time_axis.TimeStep freq = pd.Timedelta(seconds=time_step_file) - time = pd.date_range(start_time_file, periods=n_steps_file, freq=freq) - elif time_axis.TimeAxisType == TimeAxisType.CalendarNonEquidistant: - idx = list(range(n_steps_file)) + time = pd.date_range(start_time_file, periods=nt, freq=freq) + elif time_axis.TimeAxisType == TimeAxisType.CalendarNonEquidistant: + idx = list(range(nt)) if isinstance(time_steps, int): return True, [idx[time_steps]] @@ -175,43 +155,10 @@ def _valid_timesteps(dfsFileInfo: DfsFileInfo, time_steps) -> Tuple[bool, List[i return single_time_selected, idx - # if isinstance(time_steps, slice): - # if isinstance(time_steps.start, int) or isinstance(time_steps.stop, int): - # time_steps = list(range(*time_steps.indices(n_steps_file))) - # else: - # s = time.slice_indexer(time_steps.start, time_steps.stop) - # time_steps = list(range(s.start, s.stop)) - # elif isinstance(time_steps, Sequence) and isinstance(time_steps[0], int): - # time_steps = np.array(time_steps) - # time_steps[time_steps < 0] = n_steps_file + time_steps[time_steps < 0] - # time_steps = list(time_steps) - - # if max(time_steps) > (n_steps_file - 1): - # raise IndexError(f"Timestep cannot be larger than {n_steps_file}") - # if min(time_steps) < 0: - # raise IndexError(f"Timestep cannot be less than {-n_steps_file}") - # elif isinstance(time_steps, Iterable): - # steps = [] - # for t in time_steps: - # _, step = _valid_timesteps(dfsFileInfo, t) - # steps.append(step[0]) - # single_time_selected = len(steps) == 1 - # time_steps = steps - - # elif isinstance(time_steps, (pd.Timestamp, datetime)): - # s = time.slice_indexer(time_steps, time_steps) - # time_steps = list(range(s.start, s.stop)) - # #elif isinstance(time_steps, pd.DatetimeIndex): - # # time_steps = list(time.get_indexer(time_steps)) - - # else: - # raise TypeError(f"Indexing is not possible with {type(time_steps)}") - # if len(time_steps) == 1: - # single_time_selected = True - # return single_time_selected, time_steps - - -def _item_numbers_by_name(dfsItemInfo, item_names: List[str], ignore_first: bool=False) -> List[int]: + +def _item_numbers_by_name( + dfsItemInfo, item_names: List[str], ignore_first: bool = False +) -> List[int]: """Utility function to find item numbers Parameters @@ -267,7 +214,9 @@ def _get_item_info( item_numbers = list(range(len(dfsItemInfo) - first_idx)) item_numbers = [i + first_idx for i in item_numbers] - items = [ItemInfo.from_mikecore_dynamic_item_info(dfsItemInfo[i]) for i in item_numbers] + items = [ + ItemInfo.from_mikecore_dynamic_item_info(dfsItemInfo[i]) for i in item_numbers + ] return ItemInfoList(items) @@ -411,7 +360,9 @@ def _read_header(self): }: self._start_time = dfs.FileInfo.TimeAxis.StartDateTime else: # relative time axis - self._start_time = datetime(1970, 1, 1) # TODO is this the proper epoch, should this magic number be somewhere else? + self._start_time = datetime( + 1970, 1, 1 + ) # TODO is this the proper epoch, should this magic number be somewhere else? if hasattr(dfs.FileInfo.TimeAxis, "TimeStep"): self._timestep_in_seconds = ( dfs.FileInfo.TimeAxis.TimeStep @@ -526,7 +477,9 @@ def append(self, data: Dataset) -> None: darray = d.reshape(d.size, 1)[:, 0] if self._ndim == 3: - raise NotImplementedError("Append is not yet available for 3D files") + raise NotImplementedError( + "Append is not yet available for 3D files" + ) if self._is_equidistant: self._dfs.WriteItemTimeStepNext(0, darray.astype(np.float32)) @@ -741,7 +694,9 @@ def time(self) -> pd.DatetimeIndex: # this will fail if the TimeAxisType is not calendar and equidistant, but that is ok if not self._is_equidistant: raise NotImplementedError("Not implemented for non-equidistant files") - return pd.date_range(start=self.start_time, periods=self.n_timesteps, freq=f"{self.timestep}S") + return pd.date_range( + start=self.start_time, periods=self.n_timesteps, freq=f"{self.timestep}S" + ) @property def projection_string(self): diff --git a/tests/test_time.py b/tests/test_time.py new file mode 100644 index 000000000..43bef18cf --- /dev/null +++ b/tests/test_time.py @@ -0,0 +1,20 @@ +import pandas as pd + +from mikeio._time import DateTimeSelector + +def test_date_time_selector(): + + idx = pd.date_range("2000-01-01", periods=4, freq="D") + assert len(idx) == 4 + + dts = DateTimeSelector(idx) + + assert dts.isel(None) == [0,1,2,3] + assert dts.isel(0) == [0] + assert dts.isel(-1) == [3] + assert dts.isel([0,1]) == [0,1] + assert dts.isel("2000-01-02") == [1] + assert dts.isel(["2000-01-02", "2000-01-03"]) == [1,2] + assert dts.isel(idx) == [0,1,2,3] + assert dts.isel(slice(1,4)) == [1,2,3] + assert dts.isel(slice("2000-01-02", "2000-01-04")) == [1,2,3] \ No newline at end of file From ca869a32799343486f08acb3fbc8b29288f5775f Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Wed, 23 Aug 2023 10:46:14 +0200 Subject: [PATCH 4/6] Test out of range --- mikeio/_time.py | 37 ++++++++++++++++++++----------------- tests/test_time.py | 26 ++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/mikeio/_time.py b/mikeio/_time.py index 33695bbe3..2d2714e21 100644 --- a/mikeio/_time.py +++ b/mikeio/_time.py @@ -7,7 +7,7 @@ @dataclass -class DateTimeSelector: +class DateTimeSelector: """Helper class for selecting time steps from a pandas DatetimeIndex""" index: pd.DatetimeIndex @@ -15,7 +15,12 @@ class DateTimeSelector: def __len__(self): return len(self.index) - def isel(self, x: Optional[int | Iterable[int] | str | datetime | pd.DatetimeIndex | slice]) -> List[int]: + def isel( + self, + x: Optional[ + int | Iterable[int] | str | datetime | pd.DatetimeIndex | slice + ] = None, + ) -> List[int]: """Select time steps from a pandas DatetimeIndex Parameters @@ -38,15 +43,15 @@ def isel(self, x: Optional[int | Iterable[int] | str | datetime | pd.DatetimeInd >>> dts.isel(-1) [3] """ - + indices = list(range(len(self.index))) if x is None: return indices - + if isinstance(x, int): return [indices[x]] - + if isinstance(x, (datetime, str)): loc = self.index.get_loc(x) if isinstance(loc, int): @@ -60,28 +65,26 @@ def isel(self, x: Optional[int | Iterable[int] | str | datetime | pd.DatetimeInd else: s = self.index.slice_indexer(x.start, x.stop) return list(range(s.start, s.stop)) - + if isinstance(x, Iterable): return [self.isel(t)[0] for t in x] - + return indices if __name__ == "__main__": - + idx = pd.date_range("2000-01-01", periods=4, freq="D") assert len(idx) == 4 - + dts = DateTimeSelector(idx) - assert dts.isel(None) == [0,1,2,3] + assert dts.isel(None) == [0, 1, 2, 3] assert dts.isel(0) == [0] assert dts.isel(-1) == [3] - assert dts.isel([0,1]) == [0,1] + assert dts.isel([0, 1]) == [0, 1] assert dts.isel("2000-01-02") == [1] - assert dts.isel(["2000-01-02", "2000-01-03"]) == [1,2] - assert dts.isel(idx) == [0,1,2,3] - assert dts.isel(slice(1,4)) == [1,2,3] - assert dts.isel(slice("2000-01-02", "2000-01-04")) == [1,2,3] - - \ No newline at end of file + assert dts.isel(["2000-01-02", "2000-01-03"]) == [1, 2] + assert dts.isel(idx) == [0, 1, 2, 3] + assert dts.isel(slice(1, 4)) == [1, 2, 3] + assert dts.isel(slice("2000-01-02", "2000-01-04")) == [1, 2, 3] diff --git a/tests/test_time.py b/tests/test_time.py index 43bef18cf..bcd177479 100644 --- a/tests/test_time.py +++ b/tests/test_time.py @@ -1,14 +1,17 @@ import pandas as pd +import pytest +from datetime import datetime from mikeio._time import DateTimeSelector -def test_date_time_selector(): +def test_date_time_selector_valid(): idx = pd.date_range("2000-01-01", periods=4, freq="D") assert len(idx) == 4 dts = DateTimeSelector(idx) + assert dts.isel() == [0,1,2,3] assert dts.isel(None) == [0,1,2,3] assert dts.isel(0) == [0] assert dts.isel(-1) == [3] @@ -17,4 +20,23 @@ def test_date_time_selector(): assert dts.isel(["2000-01-02", "2000-01-03"]) == [1,2] assert dts.isel(idx) == [0,1,2,3] assert dts.isel(slice(1,4)) == [1,2,3] - assert dts.isel(slice("2000-01-02", "2000-01-04")) == [1,2,3] \ No newline at end of file + assert dts.isel(slice("2000-01-02", "2000-01-04")) == [1,2,3] + assert dts.isel(datetime(2000,1,2)) == [1] + +def test_out_of_range_int(): + idx = pd.date_range("2000-01-01", periods=4, freq="D") + dts = DateTimeSelector(idx) + with pytest.raises(IndexError): + dts.isel(4) + +def test_out_of_range_str(): + idx = pd.date_range("2000-01-01", periods=4, freq="D") + dts = DateTimeSelector(idx) + with pytest.raises(KeyError): + dts.isel("2000-01-05") + +def test_out_of_range_datetime(): + idx = pd.date_range("2000-01-01", periods=4, freq="D") + dts = DateTimeSelector(idx) + with pytest.raises(KeyError): + dts.isel(datetime(2000,1,5)) \ No newline at end of file From e37a258da6b3812fc9db4da901ac45d96c7920f9 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Wed, 23 Aug 2023 11:58:38 +0200 Subject: [PATCH 5/6] Remove WIP --- mikeio/_time.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/mikeio/_time.py b/mikeio/_time.py index 2d2714e21..d1e65b167 100644 --- a/mikeio/_time.py +++ b/mikeio/_time.py @@ -12,9 +12,6 @@ class DateTimeSelector: index: pd.DatetimeIndex - def __len__(self): - return len(self.index) - def isel( self, x: Optional[ @@ -69,22 +66,4 @@ def isel( if isinstance(x, Iterable): return [self.isel(t)[0] for t in x] - return indices - - -if __name__ == "__main__": - - idx = pd.date_range("2000-01-01", periods=4, freq="D") - assert len(idx) == 4 - - dts = DateTimeSelector(idx) - - assert dts.isel(None) == [0, 1, 2, 3] - assert dts.isel(0) == [0] - assert dts.isel(-1) == [3] - assert dts.isel([0, 1]) == [0, 1] - assert dts.isel("2000-01-02") == [1] - assert dts.isel(["2000-01-02", "2000-01-03"]) == [1, 2] - assert dts.isel(idx) == [0, 1, 2, 3] - assert dts.isel(slice(1, 4)) == [1, 2, 3] - assert dts.isel(slice("2000-01-02", "2000-01-04")) == [1, 2, 3] + return indices \ No newline at end of file From 130c48e4366591ab16cf4832a5efe7461ac06b41 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Wed, 23 Aug 2023 11:59:09 +0200 Subject: [PATCH 6/6] Use selector for indexing in DataArrays --- mikeio/dataset/_data_utils.py | 47 ++++++++--------------------------- tests/test_dataarray.py | 2 +- tests/test_dfsu_layered.py | 2 +- 3 files changed, 13 insertions(+), 38 deletions(-) diff --git a/mikeio/dataset/_data_utils.py b/mikeio/dataset/_data_utils.py index 49047743d..01765cab6 100644 --- a/mikeio/dataset/_data_utils.py +++ b/mikeio/dataset/_data_utils.py @@ -1,11 +1,12 @@ from __future__ import annotations import re -from datetime import datetime -from typing import Iterable, Sequence, Sized, Tuple +from typing import Iterable, Sequence, Sized, Tuple, Union, List import numpy as np import pandas as pd +from .._time import DateTimeSelector + def _to_safe_name(name: str) -> str: tmp = re.sub("[^0-9a-zA-Z]", "_", name) @@ -18,45 +19,19 @@ def _n_selected_timesteps(x: Sized, k: slice | Sized) -> int: return len(k) -def _get_time_idx_list(time: pd.DatetimeIndex, steps): +def _get_time_idx_list(time: pd.DatetimeIndex, steps) -> Union [List[int], slice]: """Find list of idx in DatetimeIndex""" - if isinstance(steps, str): - parts = steps.split(",") - if len(parts) == 1: - parts.append(parts[0]) # end=start - - if parts[0] == "": - steps = slice(parts[1]) # stop only - elif parts[1] == "": - steps = slice(parts[0], None) # start only - else: - steps = slice(parts[0], parts[1]) + # indexing with a slice needs to be handled differently, since slicing returns a view - if isinstance(steps, (list, tuple)) and isinstance( - steps[0], (str, datetime, np.datetime64, pd.Timestamp) - ): - steps = pd.DatetimeIndex(steps) - if isinstance(steps, pd.DatetimeIndex): - return time.get_indexer(steps) - if isinstance(steps, (str, datetime, np.datetime64, pd.Timestamp)): - steps = slice(steps, steps) if isinstance(steps, slice): - try: - s = time.slice_indexer( - steps.start, - steps.stop, - ) - steps = list(range(s.start, s.stop)) - except TypeError: - pass # TODO this seems fishy! - # steps = list(range(*steps.indices(len(time)))) - elif isinstance(steps, int): - steps = [steps] - # TODO what is the return type of this function - return steps + if isinstance(steps.start, int) and isinstance(steps.stop, int): + return steps + dts = DateTimeSelector(time) + return dts.isel(steps) +# TODO this only used by DataArray, so consider to move it there class DataUtilsMixin: """DataArray Utils""" @@ -107,7 +82,7 @@ def _set_by_boolean_mask(data: np.ndarray, mask: np.ndarray, value) -> None: def _parse_time(time) -> pd.DatetimeIndex: """Allow anything that we can create a DatetimeIndex from""" if time is None: - time = [pd.Timestamp(2018, 1, 1)] + time = [pd.Timestamp(2018, 1, 1)] # TODO is this the correct epoch? if isinstance(time, str) or (not isinstance(time, Iterable)): time = [time] diff --git a/tests/test_dataarray.py b/tests/test_dataarray.py index 8f8803560..49451561a 100644 --- a/tests/test_dataarray.py +++ b/tests/test_dataarray.py @@ -1292,7 +1292,7 @@ def test_time_selection(): assert das_t.shape == (24,) - with pytest.raises(IndexError): + with pytest.raises(KeyError): # not in time ds.sel(time="1997-09-15 00:00") diff --git a/tests/test_dfsu_layered.py b/tests/test_dfsu_layered.py index ee7db254a..2952086c7 100644 --- a/tests/test_dfsu_layered.py +++ b/tests/test_dfsu_layered.py @@ -297,7 +297,7 @@ def test_read_column_interp_time_and_select_time(): salinity_st = da.sel(time="1997-09-15 23:00") # single time-step assert salinity_st.n_timesteps == 1 - with pytest.raises(IndexError): + with pytest.raises(KeyError): # not in time da.sel(time="1997-09-15 00:00")