From 9815f27b1688b315de04e765fdb8c3ea6311d83e Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Thu, 29 Feb 2024 19:52:01 +0100 Subject: [PATCH 1/8] Remove Dfs2.write --- mikeio/dfs/_dfs.py | 8 ++- mikeio/dfs/_dfs2.py | 155 ++++++++++++++++++++++---------------------- tests/test_dfs2.py | 72 ++------------------ 3 files changed, 85 insertions(+), 150 deletions(-) diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index 873ed0d58..6b0bf020e 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -286,7 +286,7 @@ def __init__(self, filename=None): self._override_coordinates = False self._timeseries_unit = TimeStepUnit.SECOND self._dt = None - self.geometry = GeometryUndefined() + self._geometry = GeometryUndefined() self._dfs = None self._source = None @@ -368,8 +368,10 @@ def read( self._dfs.Close() return Dataset(data_list, time, items, geometry=self.geometry, validate=False) - def _read_header(self): - dfs = self._dfs + def _read_header(self, filename: str) -> None: + dfs = DfsFileFactory.DfsGenericOpen(filename) + # TODO remove _dfs attribute + self._dfs = dfs self._n_items = len(dfs.ItemInfo) self._items = self._get_item_info(list(range(self._n_items))) self._timeaxistype = dfs.FileInfo.TimeAxis.TimeAxisType diff --git a/mikeio/dfs/_dfs2.py b/mikeio/dfs/_dfs2.py index 278556add..6772b3e61 100644 --- a/mikeio/dfs/_dfs2.py +++ b/mikeio/dfs/_dfs2.py @@ -2,17 +2,17 @@ from copy import deepcopy from pathlib import Path from typing import List, Tuple -import warnings +from collections.abc import Sequence import numpy as np import pandas as pd from tqdm import tqdm -from mikecore.DfsFactory import DfsBuilder, DfsFactory # type: ignore -from mikecore.DfsFile import DfsFile, DfsSimpleType # type: ignore -from mikecore.DfsFileFactory import DfsFileFactory # type: ignore -from mikecore.eum import eumQuantity, eumUnit # type: ignore -from mikecore.Projections import Cartography # type: ignore +from mikecore.DfsFactory import DfsBuilder, DfsFactory +from mikecore.DfsFile import DfsFile, DfsSimpleType +from mikecore.DfsFileFactory import DfsFileFactory +from mikecore.eum import eumQuantity, eumUnit +from mikecore.Projections import Cartography from .. import __dfs_version__ from ..dataset import Dataset @@ -27,12 +27,12 @@ from ..spatial import Grid2D -def write_dfs2(filename: str | Path, ds: Dataset, title="") -> None: +def write_dfs2(filename: str | Path, ds: Dataset, title: str = "") -> None: dfs = _write_dfs2_header(filename, ds, title) _write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=2) -def _write_dfs2_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: +def _write_dfs2_header(filename: str | Path, ds: Dataset, title: str = "") -> DfsFile: builder = DfsBuilder.Create(title, "mikeio", __dfs_version__) builder.SetDataType(0) @@ -108,36 +108,35 @@ def _write_dfs2_spatial_axis(builder, factory, geometry): class Dfs2(_Dfs123): _ndim = 2 - def __init__(self, filename=None, type: str = "horizontal"): + def __init__(self, filename: str | Path, type: str = "horizontal"): + filename = str(filename) super().__init__(filename) - # TODO find a better way to avoid initializing these non-sensical values - self._dx = 0.0 - self._dy = 0.0 - self._nx = 0 - self._ny = 0 + # TODO move to base class + self._read_header(filename) + + # TODO self._x0 = 0.0 self._y0 = 0.0 - self.geometry = None - - if filename: - is_spectral = type.lower() in ["spectral", "spectra", "spectrum"] - self._read_dfs2_header(filename=filename, read_x0y0=is_spectral) - self._validate_no_orientation_in_geo() - origin, orientation = self._origin_and_orientation_in_CRS() - - self.geometry = Grid2D( - dx=self._dx, - dy=self._dy, - nx=self._nx, - ny=self._ny, - x0=self._x0, - y0=self._y0, - orientation=orientation, - origin=origin, - projection=self._projstr, - is_spectral=is_spectral, - ) + + is_spectral = type.lower() in ["spectral", "spectra", "spectrum"] + # self._read_dfs2_header(filename=filename, read_x0y0=is_spectral) + self._geometry = self._read_geometry(filename=filename, is_spectral=is_spectral) + self._validate_no_orientation_in_geo() + # origin, orientation = self._origin_and_orientation_in_CRS() + + # self.geometry = Grid2D( + # dx=self._dx, + # dy=self._dy, + # nx=self._nx, + # ny=self._ny, + # x0=self._x0, + # y0=self._y0, + # orientation=orientation, + # origin=origin, + # projection=self._projstr, + # is_spectral=is_spectral, + # ) def __repr__(self): out = [""] @@ -162,7 +161,30 @@ def __repr__(self): return str.join("\n", out) - def _read_dfs2_header(self, filename, read_x0y0: bool = False): + def _read_geometry(self, filename: str | Path, is_spectral: bool = False) -> Grid2D: + dfs = DfsFileFactory.Dfs2FileOpen(str(filename)) + + x0 = dfs.SpatialAxis.X0 if is_spectral else 0.0 + y0 = dfs.SpatialAxis.Y0 if is_spectral else 0.0 + + origin, orientation = self._origin_and_orientation_in_CRS() + + geometry = Grid2D( + dx=dfs.SpatialAxis.Dx, + dy=dfs.SpatialAxis.Dy, + nx=dfs.SpatialAxis.XCount, + ny=dfs.SpatialAxis.YCount, + x0=x0, + y0=y0, + projection=self._projstr, + orientation=orientation, + origin=origin, + is_spectral=is_spectral, + ) + dfs.Close() + return geometry + + def _read_dfs2_header(self, filename: str | Path, read_x0y0: bool = False) -> None: self._dfs = DfsFileFactory.Dfs2FileOpen(str(filename)) self._source = self._dfs if read_x0y0: @@ -180,11 +202,11 @@ def _read_dfs2_header(self, filename, read_x0y0: bool = False): def read( self, *, - items=None, - time=None, - area=None, - keepdims=False, - dtype=np.float32, + items: str | int | Sequence[str | int] | None = None, + time: int | str | slice | None = None, + area: Tuple[float, float, float, float] | None = None, + keepdims: bool = False, + dtype: Any = np.float32, ) -> Dataset: """ Read data from a dfs2 file @@ -226,7 +248,7 @@ def read( geometry = self.geometry._index_to_Grid2D(ii, jj) else: take_subset = False - shape = (nt, self._ny, self._nx) + shape = (nt, self.ny, self.nx) geometry = self.geometry if single_time_selected and not keepdims: @@ -244,7 +266,7 @@ def read( d = itemdata.Data d[d == self.deletevalue] = np.nan - d = d.reshape(self._ny, self._nx) + d = d.reshape(self.ny, self.nx) if take_subset: d = np.take(np.take(d, jj, axis=0), ii, axis=-1) @@ -280,36 +302,6 @@ def _open(self): self._dfs = DfsFileFactory.Dfs2FileOpen(self._filename) self._source = self._dfs - def write( - self, - filename: str, - data: Dataset, - dt: float | None = None, - title: str | None = None, - keep_open: bool = False, - ): - # this method is deprecated - warnings.warn( - FutureWarning("Dfs2.write() is deprecated, use Dataset.to_dfs() instead") - ) - - filename = str(filename) - - self._builder = DfsBuilder.Create(title, "mikeio", __dfs_version__) - self._dx = data.geometry.dx - self._dy = data.geometry.dy - - self._write( - filename=filename, - ds=data, - dt=dt, - title=title, - keep_open=keep_open, - ) - - if keep_open: - return self - def _set_spatial_axis(self): self._builder.SetSpatialAxis( self._factory.CreateAxisEqD2( @@ -323,40 +315,45 @@ def _set_spatial_axis(self): ) ) + @property + def geometry(self) -> Grid2D: + """Spatial information""" + return self._geometry + @property def x0(self): """Start point of x values (often 0)""" - return self._x0 + return self.geometry.x[0] @property def y0(self): """Start point of y values (often 0)""" - return self._y0 + return self.geometry.y[0] @property def dx(self): """Step size in x direction""" - return self._dx + return self.geometry.dx @property def dy(self): """Step size in y direction""" - return self._dy + return self.geometry.dy @property def shape(self) -> Tuple[int, ...]: """Tuple with number of values in the t-, y-, x-direction""" - return (self._n_timesteps, self._ny, self._nx) + return (self._n_timesteps, self.geometry.ny, self.geometry.nx) @property def nx(self): """Number of values in the x-direction""" - return self._nx + return self.geometry.nx @property def ny(self): """Number of values in the y-direction""" - return self._ny + return self.geometry.ny @property def is_geo(self): diff --git a/tests/test_dfs2.py b/tests/test_dfs2.py index d2c68f473..e9a89f3d4 100644 --- a/tests/test_dfs2.py +++ b/tests/test_dfs2.py @@ -164,6 +164,7 @@ def test_write_without_time(tmp_path): def test_read(dfs2_random): dfs = dfs2_random + assert isinstance(dfs.geometry, Grid2D) ds = dfs.read(items=["testing water level"]) data = ds[0].to_numpy() assert data[0, 88, 0] == 0 @@ -318,6 +319,9 @@ def test_properties_pt_spectrum(dfs2_pt_spectrum): def test_properties_pt_spectrum_linearf(dfs2_pt_spectrum_linearf): dfs = dfs2_pt_spectrum_linearf + # This file doesn't have a valid projection string + # dfs.FileInfo.Projection.WKTString = '' + assert dfs.x0 == pytest.approx(0.00390625) assert dfs.y0 == 0 assert dfs.dx == pytest.approx(0.00390625) @@ -456,15 +460,6 @@ def test_repr(dfs2_gebco): assert "dx" in text -def test_repr_empty(): - - dfs = Dfs2() - - text = repr(dfs) - - assert "Dfs2" in text - - def test_repr_time(dfs2_random): dfs = dfs2_random @@ -641,65 +636,6 @@ def test_write_non_equidistant_data(tmp_path): assert not ds3.is_equidistant -def test_incremental_write_from_dfs2(tmp_path): - "Useful for writing datasets with many timesteps to avoid problems with out of memory" - - fp = tmp_path / "appended.dfs2" - dfs = mikeio.open("tests/testdata/eq.dfs2") - - nt = dfs.n_timesteps - - ds = dfs.read(time=[0], keepdims=True) - # assert ds.timestep == dfs.timestep, # ds.timestep is undefined - - # TODO find a better way to do this, without having to create a new dfs2 object - dfs_to_write = Dfs2() - - with pytest.warns(FutureWarning): - dfs_to_write.write(fp, ds, dt=dfs.timestep, keep_open=True) - - for i in range(1, nt): - ds = dfs.read(time=[i], keepdims=True) - - with pytest.warns(FutureWarning): - dfs_to_write.append(ds) - - dfs_to_write.close() - - newdfs = mikeio.open(fp) - assert dfs.start_time == newdfs.start_time - assert dfs.timestep == newdfs.timestep - assert dfs.end_time == newdfs.end_time - - -def test_incremental_write_from_dfs2_context_manager(tmp_path): - "Useful for writing datasets with many timesteps to avoid problems with out of memory" - - fp = tmp_path / "appended.dfs2" - dfs = mikeio.open("tests/testdata/eq.dfs2") - - nt = dfs.n_timesteps - - ds = dfs.read(time=[0], keepdims=True) - - dfs_to_write = Dfs2() - - with pytest.warns(FutureWarning): - with dfs_to_write.write(fp, ds, dt=dfs.timestep, keep_open=True) as f: - - for i in range(1, nt): - ds = dfs.read(time=[i], keepdims=True) - with pytest.warns(FutureWarning): - f.append(ds) - - # dfs_to_write.close() # called automagically by context manager - - newdfs = mikeio.open(fp) - assert dfs.start_time == newdfs.start_time - assert dfs.timestep == newdfs.timestep - assert dfs.end_time == newdfs.end_time - - def test_read_concat_write_dfs2(tmp_path): outfilename = tmp_path / "waves_concat.dfs2" From dfafb6f0f51335c0b3923de602090ef1be41c417 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Thu, 29 Feb 2024 20:50:14 +0100 Subject: [PATCH 2/8] Refactor by deletion --- mikeio/dfs/_dfs.py | 281 ++++++-------------------------------------- mikeio/dfs/_dfs1.py | 42 +++---- mikeio/dfs/_dfs2.py | 75 ++---------- mikeio/dfs/_dfs3.py | 199 ++++++++----------------------- tests/test_dfs2.py | 5 +- 5 files changed, 113 insertions(+), 489 deletions(-) diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index 6b0bf020e..cb529f091 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -3,26 +3,23 @@ from abc import abstractmethod from dataclasses import dataclass from datetime import datetime, timedelta -from typing import List, Tuple, Sequence +from typing import Any, List, Tuple, Sequence import numpy as np import pandas as pd -from tqdm import tqdm, trange +from tqdm import tqdm -from mikecore.DfsFactory import DfsFactory from mikecore.DfsFile import ( DfsDynamicItemInfo, DfsFile, DfsFileInfo, - DfsSimpleType, TimeAxisType, ) from mikecore.DfsFileFactory import DfsFileFactory -from mikecore.eum import eumQuantity from mikecore.Projections import Cartography from ..dataset import Dataset from ..eum import EUMType, EUMUnit, ItemInfo, ItemInfoList, TimeStepUnit -from ..exceptions import DataDimensionMismatch, ItemsError +from ..exceptions import ItemsError from ..spatial import GeometryUndefined from .._time import DateTimeSelector @@ -290,13 +287,33 @@ def __init__(self, filename=None): self._dfs = None self._source = None + def __repr__(self): + name = self.__class__.__name__ + out = [f""] + + out.append(f"geometry: {self.geometry}") + if self._n_items < 10: + out.append("items:") + for i, item in enumerate(self.items): + out.append(f" {i}: {item}") + else: + out.append(f"number of items: {self._n_items}") + + if self._n_timesteps == 1: + out.append("time: time-invariant file (1 step)") + else: + out.append(f"time: {self._n_timesteps} steps") + out.append(f"start time: {self._start_time}") + + return str.join("\n", out) + def read( self, *, - items=None, - time=None, - keepdims=False, - dtype=np.float32, + items: str | int | Sequence[str | int] | None = None, + time: int | str | slice | None = None, + keepdims: bool = False, + dtype: Any = np.float32, ) -> Dataset: """ Read data from a dfs file @@ -327,11 +344,11 @@ def read( shape: Tuple[int, ...] if self._ndim == 1: - shape = (nt, self._nx) + shape = (nt, self.nx) # type: ignore elif self._ndim == 2: - shape = (nt, self._ny, self._nx) + shape = (nt, self.ny, self.nx) # type: ignore else: - shape = (nt, self._nz, self._ny, self._nx) + shape = (nt, self.nz, self.ny, self.nx) # type: ignore if single_time_selected and not keepdims: shape = shape[1:] @@ -352,7 +369,7 @@ def read( d[d == self.deletevalue] = np.nan if self._ndim == 2: - d = d.reshape(self._ny, self._nx) + d = d.reshape(self.ny, self.nx) # type: ignore if single_time_selected: data_list[item] = d @@ -399,244 +416,12 @@ def _read_header(self, filename: str) -> None: dfs.Close() - def _write( - self, - *, - filename, - ds, - dt, - coordinate=None, - title, - keep_open=False, - ): - assert isinstance(ds, Dataset) - - neq_datetimes = None - if isinstance(ds, Dataset) and not ds.is_equidistant: - neq_datetimes = ds.time - - header, data = self._write_handle_common_arguments( - title=title, data=ds, dt=dt, coordinate=coordinate - ) - - shape = np.shape(data[0]) - t_offset = 0 if len(shape) == self._ndim else 1 - - # TODO find out a clever way to handle the grid dimensions - if self._ndim == 1: - self._nx = shape[t_offset + 0] - elif self._ndim == 2: - self._ny = shape[t_offset + 0] - self._nx = shape[t_offset + 1] - elif self._ndim == 3: - self._nz = shape[t_offset + 0] - self._ny = shape[t_offset + 1] - self._nx = shape[t_offset + 2] - - self._factory = DfsFactory() - - # TODO pass grid - self._set_spatial_axis() - - if self._ndim == 1: - if not all(np.shape(d)[t_offset + 0] == self._nx for d in data): - raise DataDimensionMismatch() - - if self._ndim == 2: - if not all(np.shape(d)[t_offset + 0] == self._ny for d in data): - raise DataDimensionMismatch() - - if not all(np.shape(d)[t_offset + 1] == self._nx for d in data): - raise DataDimensionMismatch() - - if neq_datetimes is not None: - self._is_equidistant = False - start_time = neq_datetimes[0] - self._start_time = start_time - - dfs = self._setup_header(filename, header) - self._dfs = dfs - - deletevalue = dfs.FileInfo.DeleteValueFloat # -1.0000000031710769e-30 - - for i in trange(header.n_timesteps, disable=not self.show_progress): - for item in range(header.n_items): - d = data[item][i] if t_offset == 1 else data[item] - d = d.copy() # to avoid modifying the input - d[np.isnan(d)] = deletevalue - - if self._is_equidistant: - dfs.WriteItemTimeStepNext(0, d.astype(np.float32)) - else: - t = neq_datetimes[i] - relt = (t - self._start_time).total_seconds() - dfs.WriteItemTimeStepNext(relt, d.astype(np.float32)) - - if not keep_open: - dfs.Close() - else: - return self - - def append(self, data: Dataset) -> None: - warnings.warn(FutureWarning("append() is deprecated.")) - - if not data.dims == ("time", "y", "x"): - raise NotImplementedError( - "Append is only available for 2D files with dims ('time', 'y', 'x')" - ) - - deletevalue = self._dfs.FileInfo.DeleteValueFloat # -1.0000000031710769e-30 - - for i in trange(data.n_timesteps, disable=not self.show_progress): - for da in data: - values = da.to_numpy() - d = values[i] - d = d.copy() # to avoid modifying the input - d[np.isnan(d)] = deletevalue - - d = d.reshape(data.shape[1:]) - darray = d.reshape(d.size, 1)[:, 0] - self._dfs.WriteItemTimeStepNext(0, darray.astype(np.float32)) - - def __enter__(self): - return self - - def __exit__(self, type, value, traceback): - self._dfs.Close() - - def close(self): - "Finalize write for a dfs file opened with `write(...,keep_open=True)`" - self._dfs.Close() - - def _write_handle_common_arguments( - self, *, title: str | None, data: Dataset, coordinate, dt: float | None = None - ): - if title is None: - self._title = "" - - n_timesteps = data.n_timesteps - n_items = data.n_items - - if coordinate is None: - if self._projstr is not None: - coordinate = [ - self._projstr, - self._longitude, - self._latitude, - self._orientation, - ] - elif isinstance(data, Dataset) and (data.geometry is not None): - coordinate = [ - data.geometry.projection_string, - data.geometry.origin[0], - data.geometry.origin[1], - data.geometry.orientation, - ] - else: - warnings.warn("No coordinate system provided") - coordinate = ["LONG/LAT", 0, 0, 0] - else: - self._override_coordinates = True - - assert isinstance( - data, Dataset - ), "data must be supplied in the form of a mikeio.Dataset" - - items = data.items - start_time = data.time[0] - n_timesteps = len(data.time) - if dt is None and len(data.time) > 1: - dt = (data.time[1] - data.time[0]).total_seconds() - data = data.to_numpy() - - if dt is None: - dt = 1 - if n_timesteps > 1: - warnings.warn("No timestep supplied. Using 1s.") - - header = DfsHeader( - n_items=n_items, - n_timesteps=n_timesteps, - dt=dt, - start_time=start_time, - coordinates=coordinate, - items=items, - ) - return header, data - - def _setup_header(self, filename: str, header: DfsHeader): - system_start_time = header.start_time - - self._builder.SetDataType(0) - - proj = self._factory.CreateProjectionGeoOrigin(*header.coordinates) - - self._builder.SetGeographicalProjection(proj) - - if self._is_equidistant: - self._builder.SetTemporalAxis( - self._factory.CreateTemporalEqCalendarAxis( - self._timeseries_unit, system_start_time, 0, header.dt - ) - ) - else: - self._builder.SetTemporalAxis( - self._factory.CreateTemporalNonEqCalendarAxis( - self._timeseries_unit, system_start_time - ) - ) - - for item in header.items: - self._builder.AddCreateDynamicItem( - item.name, - eumQuantity.Create(item.type, item.unit), - DfsSimpleType.Float, - item.data_value_type, - ) - - try: - self._builder.CreateFile(filename) - except IOError: - # TODO does this make sense? - print("cannot create dfs file: ", filename) - - return self._builder.GetFile() - def _open(self): raise NotImplementedError("Should be implemented by subclass") def _set_spatial_axis(self): raise NotImplementedError("Should be implemented by subclass") - def _find_item(self, item_names): - """Utility function to find item numbers - - Parameters - ---------- - dfs : DfsFile - - item_names : list[str] - Names of items to be found - - Returns - ------- - list[int] - item numbers (0-based) - - Raises - ------ - KeyError - In case item is not found in the dfs file - """ - names = [x.Name for x in self._dfs.ItemInfo] - item_lookup = {name: i for i, name in enumerate(names)} - try: - item_numbers = [item_lookup[x] for x in item_names] - except KeyError: - raise KeyError(f"Selected item name not found. Valid names are {names}") - - return item_numbers - def _get_item_info(self, item_numbers): """Read DFS ItemInfo @@ -661,6 +446,10 @@ def _get_item_info(self, item_numbers): items.append(item) return items + @property + def geometry(self) -> Any: + return self._geometry + @property def deletevalue(self): "File delete value" diff --git a/mikeio/dfs/_dfs1.py b/mikeio/dfs/_dfs1.py index 45560f97d..370697828 100644 --- a/mikeio/dfs/_dfs1.py +++ b/mikeio/dfs/_dfs1.py @@ -28,7 +28,12 @@ def _write_dfs1_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: geometry: Grid1D = ds.geometry factory = DfsFactory() - proj = factory.CreateProjectionGeoOrigin(ds.geometry.projection_string,ds.geometry.origin[0],ds.geometry.origin[1],ds.geometry.orientation) + proj = factory.CreateProjectionGeoOrigin( + ds.geometry.projection_string, + ds.geometry.origin[0], + ds.geometry.origin[1], + ds.geometry.orientation, + ) builder.SetGeographicalProjection(proj) builder.SetSpatialAxis( factory.CreateAxisEqD1( @@ -41,9 +46,7 @@ def _write_dfs1_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: timestep_unit = TimeStepUnit.SECOND dt = ds.timestep or 1.0 # It can not be None - time_axis = factory.CreateTemporalEqCalendarAxis( - timestep_unit, ds.time[0], 0, dt - ) + time_axis = factory.CreateTemporalEqCalendarAxis(timestep_unit, ds.time[0], 0, dt) builder.SetTemporalAxis(time_axis) for item in ds.items: @@ -60,7 +63,7 @@ def _write_dfs1_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: print("cannot create dfs file: ", filename) return builder.GetFile() - + class Dfs1(_Dfs123): _ndim = 1 @@ -76,10 +79,10 @@ def __init__(self, filename): self._dx = self._dfs.SpatialAxis.Dx self._nx = self._dfs.SpatialAxis.XCount - self._read_header() + self._read_header(str(filename)) origin = self._longitude, self._latitude - self.geometry = Grid1D( + self._geometry = Grid1D( x0=self._x0, dx=self._dx, nx=self._nx, @@ -88,27 +91,6 @@ def __init__(self, filename): orientation=self._orientation, ) - def __repr__(self): - out = [""] - - out.append(f"dx: {self.dx:.5f}") - - if self._n_items < 10: - out.append("items:") - for i, item in enumerate(self.items): - out.append(f" {i}: {item}") - else: - out.append(f"number of items: {self._n_items}") - - if self._n_timesteps == 1: - out.append("time: time-invariant file (1 step)") - else: - out.append(f"time: {self._n_timesteps} steps") - out.append(f"start time: {self._start_time}") - - return str.join("\n", out) - - def _open(self): self._dfs = DfsFileFactory.Dfs1FileOpen(self._filename) @@ -119,6 +101,10 @@ def _set_spatial_axis(self): ) ) + @property + def geometry(self): + return self._geometry + @property def x0(self): """Start point of x values (often 0)""" diff --git a/mikeio/dfs/_dfs2.py b/mikeio/dfs/_dfs2.py index 6772b3e61..8b04cc7ed 100644 --- a/mikeio/dfs/_dfs2.py +++ b/mikeio/dfs/_dfs2.py @@ -1,7 +1,7 @@ from __future__ import annotations from copy import deepcopy from pathlib import Path -from typing import List, Tuple +from typing import Any, List, Literal, Tuple from collections.abc import Sequence import numpy as np @@ -108,7 +108,11 @@ def _write_dfs2_spatial_axis(builder, factory, geometry): class Dfs2(_Dfs123): _ndim = 2 - def __init__(self, filename: str | Path, type: str = "horizontal"): + def __init__( + self, + filename: str | Path, + type: Literal["horizontal", "spectral"] = "horizontal", + ): filename = str(filename) super().__init__(filename) @@ -116,52 +120,10 @@ def __init__(self, filename: str | Path, type: str = "horizontal"): self._read_header(filename) # TODO - self._x0 = 0.0 - self._y0 = 0.0 - - is_spectral = type.lower() in ["spectral", "spectra", "spectrum"] - # self._read_dfs2_header(filename=filename, read_x0y0=is_spectral) - self._geometry = self._read_geometry(filename=filename, is_spectral=is_spectral) - self._validate_no_orientation_in_geo() - # origin, orientation = self._origin_and_orientation_in_CRS() - - # self.geometry = Grid2D( - # dx=self._dx, - # dy=self._dy, - # nx=self._nx, - # ny=self._ny, - # x0=self._x0, - # y0=self._y0, - # orientation=orientation, - # origin=origin, - # projection=self._projstr, - # is_spectral=is_spectral, - # ) - - def __repr__(self): - out = [""] - - if self._filename: - out.append(f"dx: {self.dx:.5f}") - out.append(f"dy: {self.dy:.5f}") - - if self._n_items is not None: - if self._n_items < 10: - out.append("items:") - for i, item in enumerate(self.items): - out.append(f" {i}: {item}") - else: - out.append(f"number of items: {self._n_items}") + # self._x0 = 0.0 + # self._y0 = 0.0 - if self._n_timesteps == 1: - out.append("time: time-invariant file (1 step)") - else: - out.append(f"time: {self._n_timesteps} steps") - out.append(f"start time: {self._start_time}") - - return str.join("\n", out) - - def _read_geometry(self, filename: str | Path, is_spectral: bool = False) -> Grid2D: + is_spectral = type == "spectral" dfs = DfsFileFactory.Dfs2FileOpen(str(filename)) x0 = dfs.SpatialAxis.X0 if is_spectral else 0.0 @@ -169,7 +131,7 @@ def _read_geometry(self, filename: str | Path, is_spectral: bool = False) -> Gri origin, orientation = self._origin_and_orientation_in_CRS() - geometry = Grid2D( + self._geometry = Grid2D( dx=dfs.SpatialAxis.Dx, dy=dfs.SpatialAxis.Dy, nx=dfs.SpatialAxis.XCount, @@ -182,22 +144,7 @@ def _read_geometry(self, filename: str | Path, is_spectral: bool = False) -> Gri is_spectral=is_spectral, ) dfs.Close() - return geometry - - def _read_dfs2_header(self, filename: str | Path, read_x0y0: bool = False) -> None: - self._dfs = DfsFileFactory.Dfs2FileOpen(str(filename)) - self._source = self._dfs - if read_x0y0: - self._x0 = self._dfs.SpatialAxis.X0 - self._y0 = self._dfs.SpatialAxis.Y0 - self._dx = self._dfs.SpatialAxis.Dx - self._dy = self._dfs.SpatialAxis.Dy - self._nx = self._dfs.SpatialAxis.XCount - self._ny = self._dfs.SpatialAxis.YCount - if self._dfs.FileInfo.TimeAxis.TimeAxisType == 4: - self._is_equidistant = False - - self._read_header() + self._validate_no_orientation_in_geo() def read( self, diff --git a/mikeio/dfs/_dfs3.py b/mikeio/dfs/_dfs3.py index 7bdde1a02..80752ba9e 100644 --- a/mikeio/dfs/_dfs3.py +++ b/mikeio/dfs/_dfs3.py @@ -1,6 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import Tuple +from collections.abc import Sequence +from typing import Any, Tuple import numpy as np import pandas as pd @@ -25,19 +26,32 @@ from ..spatial import Grid3D -def write_dfs3(filename: str | Path, ds: Dataset, title="") -> None: +def write_dfs3(filename: str | Path, ds: Dataset, title: str = "") -> None: dfs = _write_dfs3_header(filename, ds, title) _write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=3) -def _write_dfs3_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: +def _write_dfs3_header(filename: str | Path, ds: Dataset, title: str) -> DfsFile: builder = DfsBuilder.Create(title, "mikeio", __dfs_version__) builder.SetDataType(0) geometry: Grid3D = ds.geometry factory = DfsFactory() - _write_dfs3_spatial_axis(builder, factory, geometry) + builder.SetSpatialAxis( + factory.CreateAxisEqD3( + eumUnit.eumUmeter, + geometry.nx, + geometry.x[0], + geometry.dx, + geometry.ny, + geometry.y[0], + geometry.dy, + geometry.nz, + geometry.z[0], + geometry.dz, + ) + ) origin = geometry.origin # Origin in geographical coordinates orient = geometry.orientation @@ -84,84 +98,38 @@ def _write_dfs3_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: return builder.GetFile() -def _write_dfs3_spatial_axis(builder, factory, geometry: Grid3D): - builder.SetSpatialAxis( - factory.CreateAxisEqD3( - eumUnit.eumUmeter, - geometry.nx, - geometry.x[0], - geometry.dx, - geometry.ny, - geometry.y[0], - geometry.dy, - geometry.nz, - geometry.z[0], - geometry.dz, - ) - ) - - class Dfs3(_Dfs123): _ndim = 3 - def __init__(self, filename=None): - super().__init__(filename) + def __init__(self, filename: str | Path): + super().__init__(str(filename)) - self._dx = None - self._dy = None - self._dz = None - self._nx = None - self._ny = None - self._nz = None + # TODO self._x0 = 0.0 self._y0 = 0.0 self._z0 = 0.0 - self.geometry = None - - if filename: - self._read_dfs3_header() - self._validate_no_orientation_in_geo() - origin, orientation = self._origin_and_orientation_in_CRS() - - self.geometry = Grid3D( - x0=self._x0, - dx=self._dx, - nx=self._nx, - y0=self._y0, - dy=self._dy, - ny=self._ny, - z0=self._z0, - dz=self._dz, - nz=self._nz, - origin=origin, - projection=self._projstr, - orientation=orientation, - ) - - def __repr__(self): - out = [""] - if self._filename: - out.append(f"geometry: {self.geometry}") - - if self._n_items is not None: - if self._n_items < 10: - out.append("items:") - for i, item in enumerate(self.items): - out.append(f" {i}: {item}") - else: - out.append(f"number of items: {self._n_items}") - - if self._n_timesteps == 1: - out.append("time: time-invariant file (1 step)") - else: - out.append(f"time: {self._n_timesteps} steps") - out.append(f"start time: {self._start_time}") - - return str.join("\n", out) + self._read_dfs3_header() + self._validate_no_orientation_in_geo() + origin, orientation = self._origin_and_orientation_in_CRS() + + self._geometry = Grid3D( + x0=self._x0, + dx=self._dx, + nx=self._nx, + y0=self._y0, + dy=self._dy, + ny=self._ny, + z0=self._z0, + dz=self._dz, + nz=self._nz, + origin=origin, + projection=self._projstr, + orientation=orientation, + ) - def _read_dfs3_header(self, read_x0y0z0: bool = False): + def _read_dfs3_header(self, read_x0y0z0: bool = False) -> None: self._dfs = DfsFileFactory.Dfs3FileOpen(self._filename) self._source = self._dfs @@ -177,17 +145,17 @@ def _read_dfs3_header(self, read_x0y0z0: bool = False): self._nx = self._dfs.SpatialAxis.XCount self._ny = self._dfs.SpatialAxis.YCount self._nz = self._dfs.SpatialAxis.ZCount - self._read_header() + self._read_header(self._filename) def read( self, *, - items=None, - time=None, - area=None, - layers=None, - keepdims=False, - dtype=np.float32, + items: str | int | Sequence[str | int] | None = None, + time: int | str | slice | None = None, + area: Tuple[float, float, float, float] | None = None, + layers: str | int | Sequence[int] | None = None, + keepdims: bool = False, + dtype: Any = np.float32, ) -> Dataset: """ Read data from a dfs3 file @@ -300,75 +268,6 @@ def read( validate=False, ) - def write( - self, - filename, - data, - dt=None, - dx=None, - dy=None, - dz=None, - coordinate=None, - title=None, - ): - """ - Create a dfs3 file - - Parameters - ---------- - - filename: str - Location to write the dfs3 file - data: Dataset - list of matrices, one for each item. Matrix dimension: time, y, x - dt: float, optional - The time step in seconds. - dx: float, optional - length of each grid in the x direction (projection units) - dy: float, optional - length of each grid in the y direction (projection units) - dz: float, optional - length of each grid in the z direction (projection units) - coordinate: - list of [projection, origin_x, origin_y, orientation] - e.g. ['LONG/LAT', 12.4387, 55.2257, 327] - title: str, optional - title of the dfs3 file. Default is blank. - keep_open: bool, optional - Keep file open for appending - """ - - if isinstance(data, list): - raise TypeError( - "supplying data as a list of numpy arrays is deprecated, please supply data in the form of a Dataset" - ) - - filename = str(filename) - - self._builder = DfsBuilder.Create(title, "mikeio", __dfs_version__) - if not self._dx: - self._dx = 1 - if dx: - self._dx = dx - - if not self._dy: - self._dy = 1 - if dy: - self._dy = dy - - if not self._dz: - self._dz = 1 - if dz: - self._dz = dz - - self._write( - filename=filename, - ds=data, - dt=dt, - coordinate=coordinate, - title=title, - ) - def _set_spatial_axis(self): self._builder.SetSpatialAxis( self._factory.CreateAxisEqD3( @@ -398,6 +297,10 @@ def _get_bottom_values(data): return b + @property + def geometry(self) -> Grid3D: + return self._geometry + @property def dx(self): """Step size in x direction""" diff --git a/tests/test_dfs2.py b/tests/test_dfs2.py index e9a89f3d4..35ed83f44 100644 --- a/tests/test_dfs2.py +++ b/tests/test_dfs2.py @@ -8,7 +8,6 @@ import mikeio -from mikeio import Dfs2 from mikeio import EUMType, ItemInfo, EUMUnit from mikeio.exceptions import ItemsError from mikeio.spatial import GeometryPoint2D, Grid2D @@ -457,7 +456,7 @@ def test_repr(dfs2_gebco): assert "Dfs2" in text assert "items" in text - assert "dx" in text + # assert "dx" in text def test_repr_time(dfs2_random): @@ -467,7 +466,7 @@ def test_repr_time(dfs2_random): assert "Dfs2" in text assert "items" in text - assert "dx" in text + # assert "dx" in text assert "steps" in text From b3491bdb7e89d1798023312333922fd0896bee19 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Thu, 29 Feb 2024 22:49:42 +0100 Subject: [PATCH 3/8] Inline read_header --- mikeio/dfs/_dfs.py | 79 +++++++++++++------------------- mikeio/dfs/_dfs1.py | 5 -- mikeio/dfs/_dfs2.py | 7 --- mikeio/dfs/_dfs3.py | 1 - mikeio/spatial/_grid_geometry.py | 2 +- 5 files changed, 34 insertions(+), 60 deletions(-) diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index cb529f091..f234bf765 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -1,4 +1,5 @@ from __future__ import annotations +from pathlib import Path import warnings from abc import abstractmethod from dataclasses import dataclass @@ -18,9 +19,8 @@ from mikecore.Projections import Cartography from ..dataset import Dataset -from ..eum import EUMType, EUMUnit, ItemInfo, ItemInfoList, TimeStepUnit +from ..eum import EUMType, EUMUnit, ItemInfo, ItemInfoList from ..exceptions import ItemsError -from ..spatial import GeometryUndefined from .._time import DateTimeSelector @@ -269,23 +269,41 @@ class _Dfs123: show_progress = False - # TODO add all common arguments def __init__(self, filename=None): + path = Path(filename) + if not path.exists(): + raise FileNotFoundError(path) self._filename = str(filename) if filename else None - self._projstr = None - self._start_time = None self._end_time = None self._is_equidistant = True - self._items = None - self._builder = None - self._factory = None - self._deletevalue = None - self._override_coordinates = False - self._timeseries_unit = TimeStepUnit.SECOND - self._dt = None - self._geometry = GeometryUndefined() - self._dfs = None - self._source = None + self._geometry = None # handled by subclass + dfs = DfsFileFactory.DfsGenericOpen(self._filename) + self._dfs = dfs + self._n_items = len(dfs.ItemInfo) + self._items = self._get_item_info(list(range(self._n_items))) + self._timeaxistype = dfs.FileInfo.TimeAxis.TimeAxisType + if self._timeaxistype in { + TimeAxisType.CalendarEquidistant, + TimeAxisType.CalendarNonEquidistant, + }: + self._start_time = dfs.FileInfo.TimeAxis.StartDateTime + else: # relative time axis + self._start_time = datetime( + 1970, 1, 1 + ) # TODO is this the proper epoch, should this magic number be somewhere else? + if hasattr(dfs.FileInfo.TimeAxis, "TimeStep"): + self._timestep_in_seconds = ( + dfs.FileInfo.TimeAxis.TimeStep + ) # TODO handle other timeunits + self._n_timesteps = dfs.FileInfo.TimeAxis.NumberOfTimeSteps + projstr = dfs.FileInfo.Projection.WKTString + self._projstr = "NON-UTM" if not projstr else projstr + self._longitude = dfs.FileInfo.Projection.Longitude + self._latitude = dfs.FileInfo.Projection.Latitude + self._orientation = dfs.FileInfo.Projection.Orientation + self._deletevalue = dfs.FileInfo.DeleteValueFloat + + dfs.Close() def __repr__(self): name = self.__class__.__name__ @@ -385,37 +403,6 @@ def read( self._dfs.Close() return Dataset(data_list, time, items, geometry=self.geometry, validate=False) - def _read_header(self, filename: str) -> None: - dfs = DfsFileFactory.DfsGenericOpen(filename) - # TODO remove _dfs attribute - self._dfs = dfs - self._n_items = len(dfs.ItemInfo) - self._items = self._get_item_info(list(range(self._n_items))) - self._timeaxistype = dfs.FileInfo.TimeAxis.TimeAxisType - if self._timeaxistype in { - TimeAxisType.CalendarEquidistant, - TimeAxisType.CalendarNonEquidistant, - }: - self._start_time = dfs.FileInfo.TimeAxis.StartDateTime - else: # relative time axis - self._start_time = datetime( - 1970, 1, 1 - ) # TODO is this the proper epoch, should this magic number be somewhere else? - if hasattr(dfs.FileInfo.TimeAxis, "TimeStep"): - self._timestep_in_seconds = ( - dfs.FileInfo.TimeAxis.TimeStep - ) # TODO handle other timeunits - # TODO to get the EndTime - self._n_timesteps = dfs.FileInfo.TimeAxis.NumberOfTimeSteps - projstr = dfs.FileInfo.Projection.WKTString - self._projstr = "NON-UTM" if not projstr else projstr - self._longitude = dfs.FileInfo.Projection.Longitude - self._latitude = dfs.FileInfo.Projection.Latitude - self._orientation = dfs.FileInfo.Projection.Orientation - self._deletevalue = dfs.FileInfo.DeleteValueFloat - - dfs.Close() - def _open(self): raise NotImplementedError("Should be implemented by subclass") diff --git a/mikeio/dfs/_dfs1.py b/mikeio/dfs/_dfs1.py index 370697828..c35b905e5 100644 --- a/mikeio/dfs/_dfs1.py +++ b/mikeio/dfs/_dfs1.py @@ -70,17 +70,12 @@ class Dfs1(_Dfs123): def __init__(self, filename): super().__init__(filename) - path = Path(filename) - if not path.exists(): - raise FileNotFoundError(path) self._dfs = DfsFileFactory.Dfs1FileOpen(str(filename)) self._x0 = self._dfs.SpatialAxis.X0 self._dx = self._dfs.SpatialAxis.Dx self._nx = self._dfs.SpatialAxis.XCount - self._read_header(str(filename)) - origin = self._longitude, self._latitude self._geometry = Grid1D( x0=self._x0, diff --git a/mikeio/dfs/_dfs2.py b/mikeio/dfs/_dfs2.py index 8b04cc7ed..adb13a437 100644 --- a/mikeio/dfs/_dfs2.py +++ b/mikeio/dfs/_dfs2.py @@ -116,13 +116,6 @@ def __init__( filename = str(filename) super().__init__(filename) - # TODO move to base class - self._read_header(filename) - - # TODO - # self._x0 = 0.0 - # self._y0 = 0.0 - is_spectral = type == "spectral" dfs = DfsFileFactory.Dfs2FileOpen(str(filename)) diff --git a/mikeio/dfs/_dfs3.py b/mikeio/dfs/_dfs3.py index 80752ba9e..1c3fe34c1 100644 --- a/mikeio/dfs/_dfs3.py +++ b/mikeio/dfs/_dfs3.py @@ -145,7 +145,6 @@ def _read_dfs3_header(self, read_x0y0z0: bool = False) -> None: self._nx = self._dfs.SpatialAxis.XCount self._ny = self._dfs.SpatialAxis.YCount self._nz = self._dfs.SpatialAxis.ZCount - self._read_header(self._filename) def read( self, diff --git a/mikeio/spatial/_grid_geometry.py b/mikeio/spatial/_grid_geometry.py index 8769a894b..03c0111a3 100644 --- a/mikeio/spatial/_grid_geometry.py +++ b/mikeio/spatial/_grid_geometry.py @@ -42,7 +42,7 @@ def _parse_grid_axis(name, x, x0=0.0, dx=None, nx=None): return x0, dx, nx -def _print_axis_txt(name, x, dx) -> str: +def _print_axis_txt(name: str, x: Sequence[float], dx: float) -> str: n = len(x) txt = f"{name}: [{x[0]:0.4g}" if n > 1: From 3742804fb03789ae305f6faa43da5e145cece350 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Thu, 29 Feb 2024 23:27:45 +0100 Subject: [PATCH 4/8] Remove some attribute --- mikeio/dfs/_dfs.py | 57 ++++++++++++++++++++++++++-------------------- tests/test_dfs2.py | 2 +- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index f234bf765..838fbea73 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -3,7 +3,7 @@ import warnings from abc import abstractmethod from dataclasses import dataclass -from datetime import datetime, timedelta +from datetime import datetime from typing import Any, List, Tuple, Sequence import numpy as np import pandas as pd @@ -279,10 +279,9 @@ def __init__(self, filename=None): self._geometry = None # handled by subclass dfs = DfsFileFactory.DfsGenericOpen(self._filename) self._dfs = dfs - self._n_items = len(dfs.ItemInfo) - self._items = self._get_item_info(list(range(self._n_items))) - self._timeaxistype = dfs.FileInfo.TimeAxis.TimeAxisType - if self._timeaxistype in { + n_items = len(dfs.ItemInfo) + self._items = self._get_item_info(list(range(n_items))) + if dfs.FileInfo.TimeAxis.TimeAxisType in { TimeAxisType.CalendarEquidistant, TimeAxisType.CalendarNonEquidistant, }: @@ -291,10 +290,24 @@ def __init__(self, filename=None): self._start_time = datetime( 1970, 1, 1 ) # TODO is this the proper epoch, should this magic number be somewhere else? + if hasattr(dfs.FileInfo.TimeAxis, "TimeStep"): - self._timestep_in_seconds = ( - dfs.FileInfo.TimeAxis.TimeStep + self._timestep = ( + # some files have dt = 0 😳 + dfs.FileInfo.TimeAxis.TimeStepInSeconds() + if dfs.FileInfo.TimeAxis.TimeStepInSeconds() > 0 + else 1 ) # TODO handle other timeunits + + freq = pd.Timedelta(seconds=self._timestep) + self._time = pd.date_range( + start=self._start_time, + periods=dfs.FileInfo.TimeAxis.NumberOfTimeSteps, + freq=freq, + ) + else: + self._timestep = None + self._time = None self._n_timesteps = dfs.FileInfo.TimeAxis.NumberOfTimeSteps projstr = dfs.FileInfo.Projection.WKTString self._projstr = "NON-UTM" if not projstr else projstr @@ -310,12 +323,12 @@ def __repr__(self): out = [f""] out.append(f"geometry: {self.geometry}") - if self._n_items < 10: + if self.n_items < 10: out.append("items:") for i, item in enumerate(self.items): out.append(f" {i}: {item}") else: - out.append(f"number of items: {self._n_items}") + out.append(f"number of items: {self.n_items}") if self._n_timesteps == 1: out.append("time: time-invariant file (1 step)") @@ -438,27 +451,31 @@ def geometry(self) -> Any: return self._geometry @property - def deletevalue(self): + def deletevalue(self) -> float: "File delete value" return self._deletevalue @property - def n_items(self): + def n_items(self) -> int: "Number of items" - return self._n_items + return len(self.items) @property - def items(self): + def items(self) -> List[ItemInfo]: "List of items" return self._items @property - def start_time(self): + def time(self) -> pd.DatetimeIndex | None: + return self._time + + @property + def start_time(self) -> pd.Timestamp: """File start time""" return self._start_time @property - def end_time(self): + def end_time(self) -> pd.Timestamp: """File end time""" if self._end_time is None: self._end_time = self.read(items=[0]).time[-1].to_pydatetime() @@ -476,16 +493,6 @@ def timestep(self) -> float: # this will fail if the TimeAxisType is not calendar and equidistant, but that is ok return self._dfs.FileInfo.TimeAxis.TimeStepInSeconds() - @property - def time(self) -> pd.DatetimeIndex: - """File all datetimes""" - # this will fail if the TimeAxisType is not calendar and equidistant, but that is ok - if not self._is_equidistant: - raise NotImplementedError("Not implemented for non-equidistant files") - - dt = timedelta(seconds=self.timestep) - return pd.date_range(start=self.start_time, periods=self.n_timesteps, freq=dt) - @property def projection_string(self): return self._projstr diff --git a/tests/test_dfs2.py b/tests/test_dfs2.py index 35ed83f44..a8a56ae30 100644 --- a/tests/test_dfs2.py +++ b/tests/test_dfs2.py @@ -727,7 +727,7 @@ def dfs2_props_to_list(d): d._n_timesteps, d._start_time, d._dfs.FileInfo.TimeAxis.TimeAxisType, - d._n_items, + d.n_items, # d._deletevalue, ] From 60d8ada00c2041e627fdba33c50cf17b9a81a20c Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Thu, 29 Feb 2024 23:37:57 +0100 Subject: [PATCH 5/8] Type hints --- mikeio/dfs/_dfs.py | 32 +++++++++++++++++--------------- mikeio/dfs/_dfs1.py | 4 ++-- mikeio/dfs/_dfs2.py | 4 ++-- mikeio/dfs/_dfs3.py | 4 ++-- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index 838fbea73..c210b75c5 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -36,17 +36,17 @@ class DfsHeader: def _read_item_time_step( *, - dfs, - filename, - time, - item_numbers, - deletevalue, - shape, - item, - it, - error_bad_data=True, - fill_bad_data_value=np.nan, -): + dfs: DfsFile, + filename: str, + time: pd.DatetimeIndex, + item_numbers: List[int], + deletevalue: float, + shape: Tuple[int, ...], + item: int, + it: int, + error_bad_data: bool = True, + fill_bad_data_value: float = np.nan, +) -> Tuple[DfsFile, np.ndarray]: itemdata = dfs.ReadItemTimeStep(item_numbers[item] + 1, it) if itemdata is not None: d = itemdata.Data @@ -65,7 +65,7 @@ def _read_item_time_step( def _fuzzy_item_search( *, dfsItemInfo: List[DfsDynamicItemInfo], search: str, start_idx: int = 0 -): +) -> List[int]: import fnmatch names = [info.Name for info in dfsItemInfo] @@ -123,7 +123,9 @@ def _valid_item_numbers( return item_numbers -def _valid_timesteps(dfsFileInfo: DfsFileInfo, time_steps) -> Tuple[bool, List[int]]: +def _valid_timesteps( + dfsFileInfo: DfsFileInfo, time_steps: int | Sequence[int] | str | slice | None +) -> Tuple[bool, List[int]]: time_axis = dfsFileInfo.TimeAxis single_time_selected = False @@ -176,7 +178,7 @@ def _valid_timesteps(dfsFileInfo: DfsFileInfo, time_steps) -> Tuple[bool, List[i def _item_numbers_by_name( - dfsItemInfo, item_names: List[str], ignore_first: bool = False + dfsItemInfo: DfsDynamicItemInfo, item_names: List[str], ignore_first: bool = False ) -> List[int]: """Utility function to find item numbers @@ -239,7 +241,7 @@ def _get_item_info( return ItemInfoList(items) -def _write_dfs_data(*, dfs: DfsFile, ds: Dataset, n_spatial_dims: int) -> None: +def write_dfs_data(*, dfs: DfsFile, ds: Dataset, n_spatial_dims: int) -> None: deletevalue = dfs.FileInfo.DeleteValueFloat # ds.deletevalue has_no_time = "time" not in ds.dims if ds.is_equidistant: diff --git a/mikeio/dfs/_dfs1.py b/mikeio/dfs/_dfs1.py index c35b905e5..377c3950c 100644 --- a/mikeio/dfs/_dfs1.py +++ b/mikeio/dfs/_dfs1.py @@ -10,7 +10,7 @@ from ..dataset import Dataset from ._dfs import ( _Dfs123, - _write_dfs_data, + write_dfs_data, ) from ..eum import TimeStepUnit from ..spatial import Grid1D @@ -18,7 +18,7 @@ def write_dfs1(filename: str | Path, ds: Dataset, title="") -> None: dfs = _write_dfs1_header(filename, ds, title) - _write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=1) + write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=1) def _write_dfs1_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: diff --git a/mikeio/dfs/_dfs2.py b/mikeio/dfs/_dfs2.py index adb13a437..571a136e5 100644 --- a/mikeio/dfs/_dfs2.py +++ b/mikeio/dfs/_dfs2.py @@ -21,7 +21,7 @@ _get_item_info, _valid_item_numbers, _valid_timesteps, - _write_dfs_data, + write_dfs_data, ) from ..eum import TimeStepUnit from ..spatial import Grid2D @@ -29,7 +29,7 @@ def write_dfs2(filename: str | Path, ds: Dataset, title: str = "") -> None: dfs = _write_dfs2_header(filename, ds, title) - _write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=2) + write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=2) def _write_dfs2_header(filename: str | Path, ds: Dataset, title: str = "") -> DfsFile: diff --git a/mikeio/dfs/_dfs3.py b/mikeio/dfs/_dfs3.py index 1c3fe34c1..b5e0ba0d1 100644 --- a/mikeio/dfs/_dfs3.py +++ b/mikeio/dfs/_dfs3.py @@ -20,7 +20,7 @@ _get_item_info, _valid_item_numbers, _valid_timesteps, - _write_dfs_data, + write_dfs_data, ) from ..eum import TimeStepUnit from ..spatial import Grid3D @@ -28,7 +28,7 @@ def write_dfs3(filename: str | Path, ds: Dataset, title: str = "") -> None: dfs = _write_dfs3_header(filename, ds, title) - _write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=3) + write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=3) def _write_dfs3_header(filename: str | Path, ds: Dataset, title: str) -> DfsFile: From 201dba91740ea4d42afde6345591bd2d40726a59 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Thu, 29 Feb 2024 23:41:55 +0100 Subject: [PATCH 6/8] Type hints --- mikeio/dfs/_dfs.py | 5 +++++ mikeio/dfs/_dfs1.py | 4 ++-- mikeio/dfs/_dfs2.py | 5 ----- mikeio/dfs/_dfs3.py | 25 ++++++++++--------------- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/mikeio/dfs/_dfs.py b/mikeio/dfs/_dfs.py index c210b75c5..68c2435ff 100644 --- a/mikeio/dfs/_dfs.py +++ b/mikeio/dfs/_dfs.py @@ -519,6 +519,11 @@ def orientation(self): """Orientation (in own CRS)""" return self.geometry.orientation + @property + def is_geo(self): + """Are coordinates geographical (LONG/LAT)?""" + return self._projstr == "LONG/LAT" + @property @abstractmethod def shape(self) -> Tuple[int, ...]: diff --git a/mikeio/dfs/_dfs1.py b/mikeio/dfs/_dfs1.py index 377c3950c..afaf1e9ae 100644 --- a/mikeio/dfs/_dfs1.py +++ b/mikeio/dfs/_dfs1.py @@ -16,12 +16,12 @@ from ..spatial import Grid1D -def write_dfs1(filename: str | Path, ds: Dataset, title="") -> None: +def write_dfs1(filename: str | Path, ds: Dataset, title: str = "") -> None: dfs = _write_dfs1_header(filename, ds, title) write_dfs_data(dfs=dfs, ds=ds, n_spatial_dims=1) -def _write_dfs1_header(filename: str | Path, ds: Dataset, title="") -> DfsFile: +def _write_dfs1_header(filename: str | Path, ds: Dataset, title: str) -> DfsFile: builder = DfsBuilder.Create(title, "mikeio", __dfs_version__) builder.SetDataType(0) diff --git a/mikeio/dfs/_dfs2.py b/mikeio/dfs/_dfs2.py index 571a136e5..b25a81269 100644 --- a/mikeio/dfs/_dfs2.py +++ b/mikeio/dfs/_dfs2.py @@ -294,8 +294,3 @@ def nx(self): def ny(self): """Number of values in the y-direction""" return self.geometry.ny - - @property - def is_geo(self): - """Are coordinates geographical (LONG/LAT)?""" - return self._projstr == "LONG/LAT" diff --git a/mikeio/dfs/_dfs3.py b/mikeio/dfs/_dfs3.py index b5e0ba0d1..aec5522cf 100644 --- a/mikeio/dfs/_dfs3.py +++ b/mikeio/dfs/_dfs3.py @@ -185,19 +185,19 @@ def read( # if keepdims is not False: # return NotImplementedError("keepdims is not yet implemented for Dfs3") - # Open the dfs file for reading dfs = DfsFileFactory.DfsGenericOpen(self._filename) item_numbers = _valid_item_numbers(dfs.ItemInfo, items) n_items = len(item_numbers) - single_time_selected, time_steps = _valid_timesteps(dfs.FileInfo, time) + single_time_selected, time_steps = _valid_timesteps( + dfs.FileInfo, time_steps=time + ) nt = len(time_steps) if not single_time_selected else 1 - # Determine the size of the grid - zNum = self.geometry.nz - yNum = self.geometry.ny - xNum = self.geometry.nx + nz = self.geometry.nz + ny = self.geometry.ny + nx = self.geometry.nx deleteValue = dfs.FileInfo.DeleteValueFloat data_list = [] @@ -209,15 +209,15 @@ def read( dims: Tuple[str, ...] shape: Tuple[int, ...] - nz = zNum if layers is None else len(layers) + nz = nz if layers is None else len(layers) if nz == 1 and (not keepdims): geometry = self.geometry._geometry_for_layers([0]) dims = ("time", "y", "x") - shape = (nt, yNum, xNum) + shape = (nt, ny, nx) else: geometry = self.geometry._geometry_for_layers(layers, keepdims) dims = ("time", "z", "y", "x") - shape = (nt, nz, yNum, xNum) + shape = (nt, nz, ny, nx) for item in range(n_items): data: np.ndarray = np.ndarray(shape=shape, dtype=dtype) @@ -234,7 +234,7 @@ def read( itemdata = dfs.ReadItemTimeStep(item_numbers[item] + 1, int(it)) d = itemdata.Data - d = d.reshape(zNum, yNum, xNum) + d = d.reshape(nz, ny, nx) d[d == deleteValue] = np.nan if layers is None: @@ -318,8 +318,3 @@ def dz(self): @property def shape(self): return (self._n_timesteps, self._nz, self._ny, self._nx) - - @property - def is_geo(self): - """Are coordinates geographical (LONG/LAT)?""" - return self._projstr == "LONG/LAT" From 4f0fb9aae9559c7460a9b70fddc4a9c2a9cad97b Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Fri, 1 Mar 2024 00:29:05 +0100 Subject: [PATCH 7/8] Fix dfs3 layers --- mikeio/dfs/_dfs3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mikeio/dfs/_dfs3.py b/mikeio/dfs/_dfs3.py index aec5522cf..a34e9e315 100644 --- a/mikeio/dfs/_dfs3.py +++ b/mikeio/dfs/_dfs3.py @@ -209,15 +209,15 @@ def read( dims: Tuple[str, ...] shape: Tuple[int, ...] - nz = nz if layers is None else len(layers) - if nz == 1 and (not keepdims): + nzl = nz if layers is None else len(layers) + if nzl == 1 and (not keepdims): geometry = self.geometry._geometry_for_layers([0]) dims = ("time", "y", "x") shape = (nt, ny, nx) else: geometry = self.geometry._geometry_for_layers(layers, keepdims) dims = ("time", "z", "y", "x") - shape = (nt, nz, ny, nx) + shape = (nt, nzl, ny, nx) for item in range(n_items): data: np.ndarray = np.ndarray(shape=shape, dtype=dtype) From e2ac907cab03260e40961b340934055ebab49c07 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Fri, 1 Mar 2024 00:40:37 +0100 Subject: [PATCH 8/8] Dfs0 --- mikeio/dfs/_dfs0.py | 239 ++++++++------------------------------------ 1 file changed, 44 insertions(+), 195 deletions(-) diff --git a/mikeio/dfs/_dfs0.py b/mikeio/dfs/_dfs0.py index c8b0fc3ac..824b1f834 100644 --- a/mikeio/dfs/_dfs0.py +++ b/mikeio/dfs/_dfs0.py @@ -1,16 +1,15 @@ from __future__ import annotations from functools import cached_property from pathlib import Path -import warnings from datetime import datetime, timedelta from typing import Sequence import numpy as np import pandas as pd -from mikecore.DfsFactory import DfsBuilder, DfsFactory # type: ignore -from mikecore.DfsFile import DataValueType, DfsSimpleType, StatType, TimeAxisType # type: ignore -from mikecore.DfsFileFactory import DfsFileFactory # type: ignore -from mikecore.eum import eumQuantity # type: ignore +from mikecore.DfsFactory import DfsBuilder, DfsFactory +from mikecore.DfsFile import DataValueType, DfsSimpleType, StatType, TimeAxisType +from mikecore.DfsFileFactory import DfsFileFactory +from mikecore.eum import eumQuantity from .. import __dfs_version__ from ..dataset import Dataset, DataArray @@ -18,7 +17,12 @@ from ..eum import EUMType, EUMUnit, ItemInfo, TimeStepUnit -def _write_dfs0(filename, dataset: Dataset, title="", dtype=DfsSimpleType.Float): +def _write_dfs0( + filename: str | Path, + dataset: Dataset, + title: str = "", + dtype: DfsSimpleType = DfsSimpleType.Float, +) -> None: filename = str(filename) factory = DfsFactory() @@ -133,7 +137,12 @@ def __repr__(self): return str.join("\n", out) - def read(self, items=None, time=None, keepdims=False) -> Dataset: + def read( + self, + items: str | int | Sequence[str | int] | None = None, + time: int | str | slice | None = None, + keepdims: bool = False, + ) -> Dataset: """ Read data from a dfs0 file. @@ -154,8 +163,7 @@ def read(self, items=None, time=None, keepdims=False) -> Dataset: raise FileNotFoundError(f"File {path} not found") # read data from file - fdata, ftime, fitems = self.__read(self._filename) - self._source = self._dfs + fdata, ftime, fitems = self._read(self._filename) dfs = self._dfs # select items @@ -197,7 +205,7 @@ def read(self, items=None, time=None, keepdims=False) -> Dataset: return ds - def __read(self, filename): + def _read(self, filename): """ Read all data from a dfs0 file. """ @@ -243,170 +251,9 @@ def _to_dfs_datatype(dtype): raise TypeError("Dfs files only support float or double") - @staticmethod - def _setup_header( - title: str, - filename: str, - start_time, - dt: float, - is_equidistant: bool, - dtype, - items: Sequence[ItemInfo], - ): - factory = DfsFactory() - builder = DfsBuilder.Create(title, "mikeio", __dfs_version__) - builder.SetDataType(1) - builder.SetGeographicalProjection(factory.CreateProjectionUndefined()) - - if is_equidistant: - temporal_axis = factory.CreateTemporalEqCalendarAxis( - TimeStepUnit.SECOND, start_time, 0, dt - ) - else: - temporal_axis = factory.CreateTemporalNonEqCalendarAxis( - TimeStepUnit.SECOND, start_time - ) - - builder.SetTemporalAxis(temporal_axis) - builder.SetItemStatisticsType(StatType.RegularStat) - - dtype_dfs = Dfs0._to_dfs_datatype(dtype) - - for item in items: - newitem = builder.CreateDynamicItemBuilder() - quantity = eumQuantity.Create(item.type, item.unit) - newitem.Set( - item.name, - quantity, - dtype_dfs, - ) - - if item.data_value_type is not None: - newitem.SetValueType(item.data_value_type) - else: - newitem.SetValueType(DataValueType.Instantaneous) - - newitem.SetAxis(factory.CreateAxisEqD0()) - builder.AddDynamicItem(newitem.GetDynamicItemInfo()) - - try: - builder.CreateFile(filename) - except IOError: - raise IOError(f"Cannot create dfs0 file: {filename}") - - return builder.GetFile() - - def write( - self, - filename, - data, - start_time=None, - dt=None, - datetimes=None, - items=None, - title="", - dtype=None, - ): - """ - Create a dfs0 file. - - Parameters - ---------- - filename: str - Full path and filename to dfs0 to be created. - data: list[np.array] - values - start_time: datetime.datetime, , optional - start date of type datetime. - dt: float, optional - the time step. Therefore dt of 5.5 with timeseries_unit of minutes - means 5 mins and 30 seconds. default to 1.0 - datetimes: list[datetime] - items: list[ItemInfo], optional - List of ItemInfo corresponding to a variable types (ie. Water Level). - title: str, optional - title, default blank - dtype : np.dtype, optional - default np.float32 - - """ - self._filename = str(filename) - self._title = title - self._dtype = dtype - - if isinstance(data, Dataset): - self._items = data.items - - if data.is_equidistant: - self._start_time = data.time[0] - self._dt = (data.time[1] - data.time[0]).total_seconds() - else: - datetimes = data.time - data = data.to_numpy() - elif datetimes is not None: - datetimes = pd.DatetimeIndex(datetimes, freq="infer") - - if dt: - self._dt = dt - - if self._dt is None: - self._dt = 1.0 - - if start_time: - self._start_time = start_time - - self._n_items = len(data) - self._n_timesteps = np.shape(data[0])[0] - - if items: - self._items = items - - if self._items is None: - warnings.warn("No items info supplied. Using Item 1, 2, 3,...") - self._items = [ItemInfo(f"Item {i + 1}") for i in range(self._n_items)] - - if len(self._items) != self._n_items: - raise ValueError("Number of items must match the number of data columns.") - - if datetimes is not None: - self._start_time = datetimes[0] - self._is_equidistant = False - t_seconds = (datetimes - datetimes[0]).total_seconds().values - else: - self._is_equidistant = True - if self._start_time is None: - self._start_time = datetime.now() - warnings.warn( - f"No start time supplied. Using current time: {self._start_time} as start time." - ) - - self._dt = float(self._dt) - t_seconds = self._dt * np.arange(float(self._n_timesteps)) - - dfs = self._setup_header( - title=self._title, - filename=self._filename, - dt=self._dt, - start_time=self._start_time, - is_equidistant=self._is_equidistant, - dtype=self._dtype, - items=self._items, - ) - - delete_value = dfs.FileInfo.DeleteValueFloat - - data = np.array(data, order="F").astype(np.float64).T - data[np.isnan(data)] = delete_value - data_to_write = np.concatenate([t_seconds.reshape(-1, 1), data], axis=1) - rc = dfs.WriteDfs0DataDouble(data_to_write) - if rc: - warnings.warn( - f"mikecore WriteDfs0DataDouble returned {rc}! Writing file probably failed." - ) - - dfs.Close() - - def to_dataframe(self, unit_in_name=False, round_time="ms") -> pd.DataFrame: + def to_dataframe( + self, unit_in_name: bool = False, round_time: str = "ms" + ) -> pd.DataFrame: """ Read data from the dfs0 file and return a Pandas DataFrame. @@ -420,7 +267,7 @@ def to_dataframe(self, unit_in_name=False, round_time="ms") -> pd.DataFrame: ------- pd.DataFrame """ - data, time, items = self.__read(self._filename) + data, time, items = self._read(self._filename) if unit_in_name: cols = [f"{item.name} ({item.unit.name})" for item in items] else: @@ -436,7 +283,13 @@ def to_dataframe(self, unit_in_name=False, round_time="ms") -> pd.DataFrame: return df @staticmethod - def from_dataframe(df, filename, itemtype=None, unit=None, items=None): + def from_dataframe( + df: pd.DataFrame, + filename: str, + itemtype: EUMType | None = None, + unit: EUMUnit | None = None, + items: Sequence[ItemInfo] | None = None, + ) -> None: """ Create a dfs0 from a pandas Dataframe @@ -457,27 +310,22 @@ def from_dataframe(df, filename, itemtype=None, unit=None, items=None): return dataframe_to_dfs0(df, filename, itemtype, unit, items) @property - def deletevalue(self): - """File delete value""" - return self._deletevalue - - @property - def n_items(self): + def n_items(self) -> int: """Number of items""" return self._n_items @property - def items(self): + def items(self) -> list[ItemInfo]: """List of items""" return self._items @property - def start_time(self): + def start_time(self) -> datetime: """File start time""" return self._start_time @cached_property - def end_time(self): + def end_time(self) -> datetime: if self._source.FileInfo.TimeAxis.IsEquidistant(): dt = self._source.FileInfo.TimeAxis.TimeStep @@ -489,32 +337,33 @@ def end_time(self): return self.start_time + timedelta(seconds=timespan) @property - def n_timesteps(self): + def n_timesteps(self) -> int: """Number of time steps""" return self._n_timesteps @property - def timestep(self): + def timestep(self) -> float: """Time step size in seconds""" if self._timeaxistype == TimeAxisType.CalendarEquidistant: return self._source.FileInfo.TimeAxis.TimeStep + else: + raise ValueError("Time axis type not supported") @property - def time(self): + def time(self) -> pd.DatetimeIndex: """File all datetimes""" if self._timeaxistype == TimeAxisType.CalendarEquidistant: - return pd.to_datetime( - [ - self.start_time + timedelta(seconds=i * self.timestep) - for i in range(self.n_timesteps) - ] + freq = pd.Timedelta(seconds=self.timestep) + return pd.date_range( + start=self.start_time, + periods=self.n_timesteps, + freq=freq, ) elif self._timeaxistype == TimeAxisType.CalendarNonEquidistant: return self.read().time - else: - return None + raise ValueError("Time axis type not supported") def series_to_dfs0(