From 845e5ba972bb38fd0218bb790cd6f0d3cfedf425 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 30 Nov 2022 19:50:23 +0100 Subject: [PATCH] Bump version --- CHANGELOG.md | 6 ++ openfisca_core/data_storage/_arrays.py | 15 ++++ openfisca_core/data_storage/_enums.py | 13 ++++ openfisca_core/data_storage/_files.py | 13 ++++ .../data_storage/in_memory_storage.py | 42 ++++++----- .../data_storage/on_disk_storage.py | 70 ++++++++++++------- openfisca_core/indexed_enums/enum_array.py | 6 +- openfisca_core/types/__init__.py | 6 ++ openfisca_core/types/_data.py | 13 ++++ openfisca_core/types/_domain.py | 18 +++++ openfisca_tasks/lint.mk | 1 - setup.py | 2 +- 12 files changed, 158 insertions(+), 47 deletions(-) create mode 100644 openfisca_core/data_storage/_arrays.py create mode 100644 openfisca_core/data_storage/_enums.py create mode 100644 openfisca_core/data_storage/_files.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 68d2f0bb38..f9af401deb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +### 35.12.1 [#1167](https://github.com/openfisca/openfisca-core/pull/1167) + +#### Technical changes + +- Add typing to `data_storage`. + ## 35.12.0 [#1160](https://github.com/openfisca/openfisca-core/pull/1160) #### New Features diff --git a/openfisca_core/data_storage/_arrays.py b/openfisca_core/data_storage/_arrays.py new file mode 100644 index 0000000000..b9fce0b775 --- /dev/null +++ b/openfisca_core/data_storage/_arrays.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Dict + +import collections +import dataclasses + +import numpy + +from openfisca_core import types + + +@dataclasses.dataclass(frozen = True) +class Arrays(collections.UserDict): + data: Dict[types.Period, numpy.ndarray] diff --git a/openfisca_core/data_storage/_enums.py b/openfisca_core/data_storage/_enums.py new file mode 100644 index 0000000000..4a735cfe13 --- /dev/null +++ b/openfisca_core/data_storage/_enums.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Dict, Type + +import collections +import dataclasses + +from openfisca_core import types + + +@dataclasses.dataclass(frozen = True) +class Enums(collections.UserDict): + data: Dict[str, Type[types.Enum]] diff --git a/openfisca_core/data_storage/_files.py b/openfisca_core/data_storage/_files.py new file mode 100644 index 0000000000..69377e117d --- /dev/null +++ b/openfisca_core/data_storage/_files.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from typing import Dict + +import collections +import dataclasses + +from openfisca_core import types + + +@dataclasses.dataclass(frozen = True) +class Files(collections.UserDict): + data: Dict[types.Period, str] diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index 70d4c288a3..efecbeb8e6 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -1,21 +1,23 @@ from __future__ import annotations -from typing import Any, Dict, KeysView, Optional +from typing import Any, Optional, Sequence import numpy from openfisca_core import periods, types +from ._arrays import Arrays + class InMemoryStorage: """ Low-level class responsible for storing and retrieving calculated vectors in memory """ - _arrays: Dict[types.Period, numpy.ndarray] + _arrays: Arrays def __init__(self, is_eternal: bool = False) -> None: - self._arrays = {} + self._arrays = Arrays({}) self.is_eternal = is_eternal def get(self, period: types.Period) -> Any: @@ -32,40 +34,46 @@ def get(self, period: types.Period) -> Any: def put(self, value: Any, period: types.Period) -> None: if self.is_eternal: period = periods.period(periods.ETERNITY) - period = periods.period(period) - self._arrays[period] = value + else: + period = periods.period(period) + + self._arrays = Arrays({period: value, **self._arrays}) def delete(self, period: Optional[types.Period] = None) -> None: if period is None: - self._arrays = {} - return + self._arrays = Arrays({}) + return None if self.is_eternal: period = periods.period(periods.ETERNITY) - period = periods.period(period) - self._arrays = { + else: + period = periods.period(period) + + self._arrays = Arrays({ period_item: value for period_item, value in self._arrays.items() if not period.contains(period_item) - } + }) - def get_known_periods(self) -> KeysView[types.Period]: - return self._arrays.keys() + def get_known_periods(self) -> Sequence[types.Period]: + return list(self._arrays.keys()) def get_memory_usage(self) -> types.MemoryUsage: if not self._arrays: - return dict( + return types.MemoryUsage( + cell_size = numpy.nan, nb_arrays = 0, total_nb_bytes = 0, - cell_size = numpy.nan, ) nb_arrays = len(self._arrays) array = next(iter(self._arrays.values())) - return dict( - nb_arrays = nb_arrays, - total_nb_bytes = array.nbytes * nb_arrays, + total = array.nbytes * nb_arrays + + return types.MemoryUsage( cell_size = array.itemsize, + nb_arrays = nb_arrays, + total_nb_bytes = total, ) diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 37baee6e48..82174d3db6 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, KeysView, NoReturn, Optional, Type +from typing import Any, NoReturn, Optional, Sequence import os import shutil @@ -9,14 +9,32 @@ from openfisca_core import periods, indexed_enums as enums, types +from ._enums import Enums +from ._files import Files + class OnDiskStorage: - """ - Low-level class responsible for storing and retrieving calculated vectors on disk + """Class responsible for storing and retrieving calculated vectors on disk. + + Attributes: + _enums: ? + _files: ? + is_eternal: ? + storage_dir: Path to store calculated vectors. + preserve_storage_dir: ? + + Args: + storage_dir: Path to store calculated vectors. + is_eternal: ? + preserve_storage_dir: ? + """ - _files: Dict[types.Period, str] - _enums: Dict[str, Type[enums.Enum]] + _enums: Enums + _files: Files + is_eternal: bool + storage_dir: str + preserve_storage_dir: bool def __init__( self, @@ -24,20 +42,20 @@ def __init__( is_eternal: bool = False, preserve_storage_dir: bool = False, ) -> None: - self._files = {} - self._enums = {} + self._enums = Enums({}) + self._files = Files({}) self.is_eternal = is_eternal - self.preserve_storage_dir = preserve_storage_dir self.storage_dir = storage_dir + self.preserve_storage_dir = preserve_storage_dir def _decode_file(self, file: str) -> Any: - enum: Optional[Type[enums.Enum]] enum = self._enums.get(file) + load = numpy.load(file) - if enum is not None: - return enums.EnumArray(numpy.load(file), enum) - else: - return numpy.load(file) + if enum is None: + return load + + return enums.EnumArray(load, enum) def get(self, period: types.Period) -> Any: if self.is_eternal: @@ -53,19 +71,23 @@ def get(self, period: types.Period) -> Any: def put(self, value: Any, period: types.Period) -> None: if self.is_eternal: period = periods.period(periods.ETERNITY) - period = periods.period(period) + + else: + period = periods.period(period) filename = str(period) - path = os.path.join(self.storage_dir, filename) + '.npy' + path = os.path.join(self.storage_dir, filename) + ".npy" + if isinstance(value, enums.EnumArray): - self._enums[path] = value.possible_values + self._enums = Enums({path: value.possible_values, **self._enums}) value = value.view(numpy.ndarray) + numpy.save(path, value) - self._files[period] = path + self._files = Files({period: path, **self._files}) def delete(self, period: Optional[types.Period] = None) -> None: if period is None: - self._files = {} + self._files = Files({}) return if self.is_eternal: @@ -73,20 +95,20 @@ def delete(self, period: Optional[types.Period] = None) -> None: period = periods.period(period) if period is not None: - self._files = { + self._files = Files({ period_item: value for period_item, value in self._files.items() if not period.contains(period_item) - } + }) - def get_known_periods(self) -> KeysView[types.Period]: - return self._files.keys() + def get_known_periods(self) -> Sequence[types.Period]: + return list(self._files.keys()) def get_memory_usage(self) -> NoReturn: raise NotImplementedError def restore(self) -> None: - self._files = {} + self._files = Files({}) # Restore self._files from content of storage_dir. for filename in os.listdir(self.storage_dir): if not filename.endswith('.npy'): @@ -94,7 +116,7 @@ def restore(self) -> None: path = os.path.join(self.storage_dir, filename) filename_core = filename.rsplit('.', 1)[0] period = periods.period(filename_core) - self._files[period] = path + self._files = Files({period: path, **self._files}) def __del__(self) -> None: if self.preserve_storage_dir: diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 6a77be57a7..2e62a3fbaa 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -1,12 +1,10 @@ from __future__ import annotations -import typing from typing import Any, NoReturn, Optional, Type import numpy -if typing.TYPE_CHECKING: - from openfisca_core.indexed_enums import Enum +from openfisca_core import types class EnumArray(numpy.ndarray): @@ -22,7 +20,7 @@ class EnumArray(numpy.ndarray): def __new__( cls, input_array: numpy.int_, - possible_values: Optional[Type[Enum]] = None, + possible_values: Optional[Type[types.Enum]] = None, ) -> EnumArray: obj = numpy.asarray(input_array).view(cls) obj.possible_values = possible_values diff --git a/openfisca_core/types/__init__.py b/openfisca_core/types/__init__.py index a780f17f32..e9bc64d08b 100644 --- a/openfisca_core/types/__init__.py +++ b/openfisca_core/types/__init__.py @@ -8,6 +8,8 @@ * :attr:`.Array` * ``ArrayLike`` * :attr:`.Cache` + * :attr:`.Enum` + * :attr:`.EnumArray` * :attr:`.Entity` * :attr:`.Formula` * :attr:`.Holder` @@ -54,6 +56,8 @@ from ._data import ( # noqa: F401 Array, ArrayLike, + Enum, + EnumArray, Instant, MemoryUsage, Period, @@ -80,6 +84,8 @@ "Array", "ArrayLike", "Entity", + "Enum", + "EnumArray", "Formula", "Holder", "Instant", diff --git a/openfisca_core/types/_data.py b/openfisca_core/types/_data.py index f64cd3da63..a8a1fbd8fd 100644 --- a/openfisca_core/types/_data.py +++ b/openfisca_core/types/_data.py @@ -12,6 +12,11 @@ >>> this == that True + >>> that = periods.Instant((1234, 7, 8)) + >>> this == that + False + + """ from __future__ import annotations @@ -72,6 +77,14 @@ """ +class Enum(Protocol): + """Enum protocol.""" + + +class EnumArray(Protocol): + """EnumArray protocol.""" + + class Instant(Protocol): """Instant protocol.""" diff --git a/openfisca_core/types/_domain.py b/openfisca_core/types/_domain.py index 165fa5926c..09a3474714 100644 --- a/openfisca_core/types/_domain.py +++ b/openfisca_core/types/_domain.py @@ -6,6 +6,9 @@ if the data they hold is different, they are equal but not fungible. Examples: + If we take entities, they are equal as long as they share the same ``key``. + Let's take the following example: + >>> from openfisca_core import entities >>> this = entities.Entity(1, "a", "b", "c") @@ -13,6 +16,21 @@ >>> this == that True + As you can see, ``this`` and ``that`` are equal because they share the same + ``key``: + + >>> this.key == that.key + True + + The opposite is also true: + + >>> that = entities.Entity(2, "a", "b", "c") + >>> this == that + False + + >>> this.key == that.key + False + """ from __future__ import annotations diff --git a/openfisca_tasks/lint.mk b/openfisca_tasks/lint.mk index a6a7ea5078..115c6267bb 100644 --- a/openfisca_tasks/lint.mk +++ b/openfisca_tasks/lint.mk @@ -42,7 +42,6 @@ check-types: ## Run static type checkers for type errors (strict). lint-typing-strict: \ lint-typing-strict-commons \ - lint-typing-strict-data_storage \ lint-typing-strict-types \ ; diff --git a/setup.py b/setup.py index 0eb2fa5624..fec13a6850 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ setup( name = 'OpenFisca-Core', - version = '35.12.0', + version = '35.12.1', author = 'OpenFisca Team', author_email = 'contact@openfisca.org', classifiers = [