Skip to content

Commit

Permalink
Complete OnDiskStorage doc
Browse files Browse the repository at this point in the history
  • Loading branch information
bonjourmauko committed Dec 9, 2022
1 parent 1b17e1c commit df7e6b4
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 30 deletions.
12 changes: 6 additions & 6 deletions openfisca_core/data_storage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""Transitional imports to ensure non-breaking changes.
Could be deprecated in the next major release.
These imports could be deprecated in the next major release.
How imports are being used today::
Currently, imports are used in the following way::
from openfisca_core.module import symbol
The previous example provokes cyclic dependency problems
that prevent us from modularizing the different components
of the library so to make them easier to test and to maintain.
This example causes cyclic dependency problems, which prevent us from
modularising the different components of the library and make them easier to
test and maintain.
How could them be used after the next major release::
After the next major release, imports could be used in the following way::
from openfisca_core import module
module.symbol()
Expand Down
22 changes: 16 additions & 6 deletions openfisca_core/data_storage/in_memory_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ class InMemoryStorage:
Attributes:
_arrays: A dictionary containing data that has been stored in memory.
is_eternal: A boolean indicating whether the storage is eternal.
is_eternal: Flag indicating if the storage of period eternity.
Args:
is_eternal: A boolean indicating whether the storage is eternal.
is_eternal: Flag indicating if the storage of period eternity.
"""

Expand Down Expand Up @@ -72,6 +72,9 @@ def put(self, value: numpy.ndarray, period: types.Period) -> None:
>>> storage.put(value, period)
>>> storage.get(period)
array([1, 2, 3])
"""

period = _funcs.parse_period(period, self.is_eternal)
Expand Down Expand Up @@ -99,6 +102,12 @@ def delete(self, period: Optional[types.Period] = None) -> None:
>>> storage.get(period)
>>> storage.put(value, period)
>>> storage.delete()
>>> storage.get(period)
"""

if period is None:
Expand All @@ -108,9 +117,9 @@ def delete(self, period: Optional[types.Period] = None) -> None:
period = _funcs.parse_period(period, self.is_eternal)

self._arrays = Arrays({
period_item: value
for period_item, value in self._arrays.items()
if not period.contains(period_item)
key: value
for key, value in self._arrays.items()
if not period.contains(key)
})

def get_known_periods(self) -> Sequence[types.Period]:
Expand Down Expand Up @@ -139,10 +148,11 @@ def get_memory_usage(self) -> types.MemoryUsage:
"""Memory usage of the storage.
Returns:
A dictionary representing the memory usage.
A dictionary representing the storage's memory usage.
Examples:
>>> storage = InMemoryStorage()
>>> storage.get_memory_usage()
{'cell_size': nan, 'nb_arrays': 0, 'total_nb_bytes': 0}
Expand Down
149 changes: 131 additions & 18 deletions openfisca_core/data_storage/on_disk_storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Any, NoReturn, Optional, Sequence
from typing import Any, NoReturn, Optional, Sequence, Union

import os
import pathlib
Expand All @@ -19,21 +19,21 @@ class OnDiskStorage:
"""Class responsible for storing/retrieving vectors on/from disk.
Attributes:
_enums: ?
_files: ?
is_eternal: ?
_enums: Mapping of file paths to possible Enum values.
_files: Mapping of periods to file paths for stored vectors.
is_eternal: Flag indicating if the storage of period eternity.
preserve_storage_dir: Flag indicating if folders should be preserved.
storage_dir: Path to store calculated vectors.
preserve_storage_dir: ?
Args:
storage_dir: Path to store calculated vectors.
is_eternal: ?
preserve_storage_dir: ?
is_eternal: Flag indicating if the storage of period eternity.
preserve_storage_dir: Flag indicating if folders should be preserved.
"""

_enums: Enums
_files: Files
_enums: Enums = Enums({})
_files: Files = Files({})
is_eternal: bool
storage_dir: str
preserve_storage_dir: bool
Expand All @@ -44,13 +44,44 @@ def __init__(
is_eternal: bool = False,
preserve_storage_dir: bool = False,
) -> None:
self._enums = Enums({})
self._files = Files({})
self.is_eternal = is_eternal
self.storage_dir = storage_dir
self.preserve_storage_dir = preserve_storage_dir

def _decode_file(self, file: str) -> Any:
"""Decodes a file by loading its contents as a NumPy array.
If the file is associated with Enum values, the array is converted back
to an EnumArray object.
Args:
file: Path to the file to be decoded.
Returns:
NumPy array or EnumArray object representing the data in the file.
Examples
>>> import tempfile
>>> class Housing(enums.Enum):
... OWNER = "Owner"
... TENANT = "Tenant"
... FREE_LODGER = "Free lodger"
... HOMELESS = "Homeless"
>>> array = numpy.array([1])
>>> value = enums.EnumArray(array, Housing)
>>> instant = periods.Instant((2017, 1, 1))
>>> period = periods.Period(("year", instant, 1))
>>> with tempfile.TemporaryDirectory() as storage_dir:
... storage = OnDiskStorage(storage_dir)
... storage.put(value, period)
... storage._decode_file(storage._files[period])
EnumArray([<Housing.TENANT: 'Tenant'>])
"""

enum = self._enums.get(file)
load = numpy.load(file)

Expand All @@ -59,7 +90,34 @@ def _decode_file(self, file: str) -> Any:

return enums.EnumArray(load, enum)

def get(self, period: types.Period) -> Any:
def get(
self,
period: types.Period,
) -> Optional[Union[numpy.ndarray, enums.EnumArray]]:
"""Retrieve the data for the specified period from disk.
Args:
period: The period for which data should be retrieved.
Returns:
A NumPy array or EnumArray object representing the vector for the
specified period, or None if no vector is stored for that period.
Examples:
>>> import tempfile
>>> value = numpy.array([1, 2, 3])
>>> instant = periods.Instant((2017, 1, 1))
>>> period = periods.Period(("year", instant, 1))
>>> with tempfile.TemporaryDirectory() as storage_dir:
... storage = OnDiskStorage(storage_dir)
... storage.put(value, period)
... storage.get(period)
array([1, 2, 3])
"""

period = _funcs.parse_period(period, self.is_eternal)
values = self._files.get(period)

Expand All @@ -69,6 +127,27 @@ def get(self, period: types.Period) -> Any:
return self._decode_file(values)

def put(self, value: Any, period: types.Period) -> None:
"""Store the specified data on disk for the specified period.
Args:
value: The data to store
period: The period for which the data should be stored.
Examples:
>>> import tempfile
>>> value = numpy.array([1, 2, 3])
>>> instant = periods.Instant((2017, 1, 1))
>>> period = periods.Period(("year", instant, 1))
>>> with tempfile.TemporaryDirectory() as storage_dir:
... storage = OnDiskStorage(storage_dir)
... storage.put(value, period)
... storage.get(period)
array([1, 2, 3])
"""

period = _funcs.parse_period(period, self.is_eternal)
stem = str(period)
path = os.path.join(self.storage_dir, f"{stem}.npy")
Expand All @@ -81,35 +160,69 @@ def put(self, value: Any, period: types.Period) -> None:
self._files = Files({period: path, **self._files})

def delete(self, period: Optional[types.Period] = None) -> None:
"""Delete the data for the specified period from disk.
Args:
period: The period for which data should be deleted. If not
specified, all data will be deleted.
Examples:
>>> import tempfile
>>> value = numpy.array([1, 2, 3])
>>> instant = periods.Instant((2017, 1, 1))
>>> period = periods.Period(("year", instant, 1))
>>> with tempfile.TemporaryDirectory() as storage_dir:
... storage = OnDiskStorage(storage_dir)
... storage.put(value, period)
... storage.get(period)
array([1, 2, 3])
>>> with tempfile.TemporaryDirectory() as storage_dir:
... storage = OnDiskStorage(storage_dir)
... storage.put(value, period)
... storage.delete(period)
... storage.get(period)
>>> with tempfile.TemporaryDirectory() as storage_dir:
... storage = OnDiskStorage(storage_dir)
... storage.put(value, period)
... storage.delete()
... storage.get(period)
"""

if period is None:
self._files = Files({})
return None

period = _funcs.parse_period(period, self.is_eternal)

self._files = Files({
period_item: value
for period_item, value in self._files.items()
if not period.contains(period_item)
key: value
for key, value in self._files.items()
if not period.contains(key)
})

def get_known_periods(self) -> Sequence[types.Period]:
"""List of storage's known periods.
Returns:
A list of periods.
A sequence containing the storage's known periods.
Examples:
>>> import tempfile
>>> instant = periods.Instant((2017, 1, 1))
>>> period = periods.Period(("year", instant, 1))
>>> with tempfile.TemporaryDirectory() as storage_dir:
... storage = OnDiskStorage(storage_dir)
... storage.get_known_periods()
[]
>>> with tempfile.TemporaryDirectory() as storage_dir:
... instant = periods.Instant((2017, 1, 1))
... period = periods.Period(("year", instant, 1))
... storage = OnDiskStorage(storage_dir)
... storage.put([], period)
... storage.get_known_periods()
Expand Down

0 comments on commit df7e6b4

Please sign in to comment.