From 7820c27fa70da207cf689ef3fb51f1a043aea572 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 9 Dec 2022 20:29:42 +0100 Subject: [PATCH] Complete OnDiskStorage doc --- openfisca_core/data_storage/__init__.py | 12 +- openfisca_core/data_storage/_arrays.py | 6 +- openfisca_core/data_storage/_enums.py | 2 +- openfisca_core/data_storage/_files.py | 2 +- openfisca_core/data_storage/_funcs.py | 2 +- .../data_storage/in_memory_storage.py | 22 ++- .../data_storage/on_disk_storage.py | 151 +++++++++++++++--- 7 files changed, 159 insertions(+), 38 deletions(-) diff --git a/openfisca_core/data_storage/__init__.py b/openfisca_core/data_storage/__init__.py index fac44a81c..16f6962bf 100644 --- a/openfisca_core/data_storage/__init__.py +++ b/openfisca_core/data_storage/__init__.py @@ -1,16 +1,16 @@ """Transitional imports to ensure non-breaking changes. -Could be deprecated in the next major release. +These imports could be deprecated in the next major release. -How imports are being used today:: +Currently, imports are used in the following way:: from openfisca_core.module import symbol -The previous example provokes cyclic dependency problems -that prevent us from modularizing the different components -of the library so to make them easier to test and to maintain. +This example causes cyclic dependency problems, which prevent us from +modularising the different components of the library and make them easier to +test and maintain. -How could them be used after the next major release:: +After the next major release, imports could be used in the following way:: from openfisca_core import module module.symbol() diff --git a/openfisca_core/data_storage/_arrays.py b/openfisca_core/data_storage/_arrays.py index 23784fb3d..24d02f5c0 100644 --- a/openfisca_core/data_storage/_arrays.py +++ b/openfisca_core/data_storage/_arrays.py @@ -8,8 +8,6 @@ from openfisca_core import types -CalculatedVector = numpy.ndarray - class Arrays(collections.UserDict): """Dictionary of calculated vectors by period. @@ -24,8 +22,8 @@ class Arrays(collections.UserDict): >>> Arrays({period: vector}) {Period(('year', Instant((2023, 1, 1)), 1)): array([1])} - .. versionadded:: 36.0.1 + .. versionadded:: 37.1.0 """ - data: Dict[types.Period, CalculatedVector] + data: Dict[types.Period, numpy.ndarray] diff --git a/openfisca_core/data_storage/_enums.py b/openfisca_core/data_storage/_enums.py index 9b98f7239..2eb08b3b1 100644 --- a/openfisca_core/data_storage/_enums.py +++ b/openfisca_core/data_storage/_enums.py @@ -26,7 +26,7 @@ class Enums(collections.UserDict): >>> Enums({path: possible_values}) {'path/to/file.py': (, )} - .. versionadded:: 36.0.1 + .. versionadded:: 37.1.0 """ diff --git a/openfisca_core/data_storage/_files.py b/openfisca_core/data_storage/_files.py index 365062553..59de72922 100644 --- a/openfisca_core/data_storage/_files.py +++ b/openfisca_core/data_storage/_files.py @@ -22,7 +22,7 @@ class Files(collections.UserDict): >>> Files({period: path}) {Period(('year', Instant((2023, 1, 1)), 1)): 'path/to/file.py'} - .. versionadded:: 36.0.1 + .. versionadded:: 37.1.0 """ diff --git a/openfisca_core/data_storage/_funcs.py b/openfisca_core/data_storage/_funcs.py index aee8c7331..e4cd2be66 100644 --- a/openfisca_core/data_storage/_funcs.py +++ b/openfisca_core/data_storage/_funcs.py @@ -24,7 +24,7 @@ def parse_period(value: types.Period, eternity: bool) -> types.Period: >>> parse_period(period, False) Period(('year', Instant((2017, 1, 1)), 1)) - .. versionadded:: 36.0.1 + .. versionadded:: 37.1.0 """ diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index d6c64cb03..8309118e5 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -15,10 +15,10 @@ class InMemoryStorage: Attributes: _arrays: A dictionary containing data that has been stored in memory. - is_eternal: A boolean indicating whether the storage is eternal. + is_eternal: Flag indicating if the storage of period eternity. Args: - is_eternal: A boolean indicating whether the storage is eternal. + is_eternal: Flag indicating if the storage of period eternity. """ @@ -72,6 +72,9 @@ def put(self, value: numpy.ndarray, period: types.Period) -> None: >>> storage.put(value, period) + >>> storage.get(period) + array([1, 2, 3]) + """ period = _funcs.parse_period(period, self.is_eternal) @@ -99,6 +102,12 @@ def delete(self, period: Optional[types.Period] = None) -> None: >>> storage.get(period) + >>> storage.put(value, period) + + >>> storage.delete() + + >>> storage.get(period) + """ if period is None: @@ -108,9 +117,9 @@ def delete(self, period: Optional[types.Period] = None) -> None: period = _funcs.parse_period(period, self.is_eternal) self._arrays = Arrays({ - period_item: value - for period_item, value in self._arrays.items() - if not period.contains(period_item) + key: value + for key, value in self._arrays.items() + if not period.contains(key) }) def get_known_periods(self) -> Sequence[types.Period]: @@ -139,10 +148,11 @@ def get_memory_usage(self) -> types.MemoryUsage: """Memory usage of the storage. Returns: - A dictionary representing the memory usage. + A dictionary representing the storage's memory usage. Examples: >>> storage = InMemoryStorage() + >>> storage.get_memory_usage() {'cell_size': nan, 'nb_arrays': 0, 'total_nb_bytes': 0} diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 4e973f57d..2c2fa74a7 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, NoReturn, Optional, Sequence +from typing import Any, NoReturn, Optional, Sequence, Union import os import pathlib @@ -19,21 +19,21 @@ class OnDiskStorage: """Class responsible for storing/retrieving vectors on/from disk. Attributes: - _enums: ? - _files: ? - is_eternal: ? + _enums: Mapping of file paths to possible Enum values. + _files: Mapping of periods to file paths for stored vectors. + is_eternal: Flag indicating if the storage of period eternity. + preserve_storage_dir: Flag indicating if folders should be preserved. storage_dir: Path to store calculated vectors. - preserve_storage_dir: ? Args: storage_dir: Path to store calculated vectors. - is_eternal: ? - preserve_storage_dir: ? + is_eternal: Flag indicating if the storage of period eternity. + preserve_storage_dir: Flag indicating if folders should be preserved. """ - _enums: Enums - _files: Files + _enums: Enums = Enums({}) + _files: Files = Files({}) is_eternal: bool storage_dir: str preserve_storage_dir: bool @@ -44,13 +44,44 @@ def __init__( is_eternal: bool = False, preserve_storage_dir: bool = False, ) -> None: - self._enums = Enums({}) - self._files = Files({}) self.is_eternal = is_eternal self.storage_dir = storage_dir self.preserve_storage_dir = preserve_storage_dir def _decode_file(self, file: str) -> Any: + """Decodes a file by loading its contents as a NumPy array. + + If the file is associated with Enum values, the array is converted back + to an EnumArray object. + + Args: + file: Path to the file to be decoded. + + Returns: + NumPy array or EnumArray object representing the data in the file. + + Examples + >>> import tempfile + + >>> class Housing(enums.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> array = numpy.array([1]) + >>> value = enums.EnumArray(array, Housing) + >>> instant = periods.Instant((2017, 1, 1)) + >>> period = periods.Period(("year", instant, 1)) + + >>> with tempfile.TemporaryDirectory() as storage_dir: + ... storage = OnDiskStorage(storage_dir) + ... storage.put(value, period) + ... storage._decode_file(storage._files[period]) + EnumArray([]) + + """ + enum = self._enums.get(file) load = numpy.load(file) @@ -59,7 +90,34 @@ def _decode_file(self, file: str) -> Any: return enums.EnumArray(load, enum) - def get(self, period: types.Period) -> Any: + def get( + self, + period: types.Period, + ) -> Optional[Union[numpy.ndarray, enums.EnumArray]]: + """Retrieve the data for the specified period from disk. + + Args: + period: The period for which data should be retrieved. + + Returns: + A NumPy array or EnumArray object representing the vector for the + specified period, or None if no vector is stored for that period. + + Examples: + >>> import tempfile + + >>> value = numpy.array([1, 2, 3]) + >>> instant = periods.Instant((2017, 1, 1)) + >>> period = periods.Period(("year", instant, 1)) + + >>> with tempfile.TemporaryDirectory() as storage_dir: + ... storage = OnDiskStorage(storage_dir) + ... storage.put(value, period) + ... storage.get(period) + array([1, 2, 3]) + + """ + period = _funcs.parse_period(period, self.is_eternal) values = self._files.get(period) @@ -69,6 +127,27 @@ def get(self, period: types.Period) -> Any: return self._decode_file(values) def put(self, value: Any, period: types.Period) -> None: + """Store the specified data on disk for the specified period. + + Args: + value: The data to store + period: The period for which the data should be stored. + + Examples: + >>> import tempfile + + >>> value = numpy.array([1, 2, 3]) + >>> instant = periods.Instant((2017, 1, 1)) + >>> period = periods.Period(("year", instant, 1)) + + >>> with tempfile.TemporaryDirectory() as storage_dir: + ... storage = OnDiskStorage(storage_dir) + ... storage.put(value, period) + ... storage.get(period) + array([1, 2, 3]) + + """ + period = _funcs.parse_period(period, self.is_eternal) stem = str(period) path = os.path.join(self.storage_dir, f"{stem}.npy") @@ -81,6 +160,39 @@ def put(self, value: Any, period: types.Period) -> None: self._files = Files({period: path, **self._files}) def delete(self, period: Optional[types.Period] = None) -> None: + """Delete the data for the specified period from disk. + + Args: + period: The period for which data should be deleted. If not + specified, all data will be deleted. + + Examples: + >>> import tempfile + + >>> value = numpy.array([1, 2, 3]) + >>> instant = periods.Instant((2017, 1, 1)) + >>> period = periods.Period(("year", instant, 1)) + + >>> with tempfile.TemporaryDirectory() as storage_dir: + ... storage = OnDiskStorage(storage_dir) + ... storage.put(value, period) + ... storage.get(period) + array([1, 2, 3]) + + >>> with tempfile.TemporaryDirectory() as storage_dir: + ... storage = OnDiskStorage(storage_dir) + ... storage.put(value, period) + ... storage.delete(period) + ... storage.get(period) + + >>> with tempfile.TemporaryDirectory() as storage_dir: + ... storage = OnDiskStorage(storage_dir) + ... storage.put(value, period) + ... storage.delete() + ... storage.get(period) + + """ + if period is None: self._files = Files({}) return None @@ -88,28 +200,29 @@ def delete(self, period: Optional[types.Period] = None) -> None: period = _funcs.parse_period(period, self.is_eternal) self._files = Files({ - period_item: value - for period_item, value in self._files.items() - if not period.contains(period_item) + key: value + for key, value in self._files.items() + if not period.contains(key) }) def get_known_periods(self) -> Sequence[types.Period]: """List of storage's known periods. Returns: - A list of periods. + A sequence containing the storage's known periods. Examples: >>> import tempfile + >>> instant = periods.Instant((2017, 1, 1)) + >>> period = periods.Period(("year", instant, 1)) + >>> with tempfile.TemporaryDirectory() as storage_dir: ... storage = OnDiskStorage(storage_dir) ... storage.get_known_periods() [] >>> with tempfile.TemporaryDirectory() as storage_dir: - ... instant = periods.Instant((2017, 1, 1)) - ... period = periods.Period(("year", instant, 1)) ... storage = OnDiskStorage(storage_dir) ... storage.put([], period) ... storage.get_known_periods() @@ -135,7 +248,7 @@ def get_memory_usage(self) -> NoReturn: ... NotImplementedError: Method not implemented for this storage. - .. versionadded:: 36.0.1 + .. versionadded:: 37.1.0 """