Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tabulator expansion #104

Draft
wants to merge 14 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ repos:
masci_tools/io/parsers/fleur/.*py|
masci_tools/io/parsers/fleur_schema/.*py|
masci_tools/io/parsers/hdf5/.*py|
masci_tools/io/parsers/tabulator/.*py|
masci_tools/io/io_nmmpmat.py|
masci_tools/io/io_fleurxml.py|
masci_tools/io/fleur_inpgen.py|
Expand Down
20 changes: 5 additions & 15 deletions masci_tools/io/parsers/tabulator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,12 @@
# For further information please visit http://judft.de/. #
# #
###############################################################################
#pylint: disable=undefined-variable
"""This subpackage contains a tabulator. Its purpose is to let you create a table of properties,
say, a pandas DataFrame, from any collections of similar objects, and reused frequently used recipes.
"""
# import submodules
from . import transformers
from . import recipes
from . import tabulator
from .tabulator import *
from .recipes import *
from .transformers import *

# import most important user classes to this level
from .transformers import \
Transformer, \
TransformedValue, \
DefaultTransformer

from .recipes import \
Recipe

from .tabulator import \
Tabulator
__all__ = (tabulator.__all__ + recipes.__all__ + transformers.__all__) #type: ignore
139 changes: 72 additions & 67 deletions masci_tools/io/parsers/tabulator/recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,25 @@

Recipes let you reuse tabulator settings for different use cases.
"""
from __future__ import annotations

import abc as _abc
import typing as _typing
import abc
from typing import Iterable, Any
try:
from typing import TypeAlias #type:ignore
except ImportError:
from typing_extensions import TypeAlias

import masci_tools.util.python_util as _masci_python_util
from .transformers import Transformer

KeyPaths: TypeAlias = 'list[Iterable[str]]'
PathList: TypeAlias = 'list[Iterable[str]] | dict[str,Any]'

class Recipe(_abc.ABC):
__all__ = ('Recipe',)


class Recipe(abc.ABC):
"""Recipe for a :py:class:`~.tabulator.Tabulator`.

Recipes hold the include, exclude list of properties which a tabulator should put into a table, by reading
Expand All @@ -39,7 +49,10 @@ class Recipe(_abc.ABC):
have dtype 'object' or 'float64' and the table won't fit into memory anymore very quickly.
"""

def __init__(self, exclude_list: dict = None, include_list: dict = None, transformer: Transformer = None, **kwargs):
def __init__(self,
exclude_list: PathList | None = None,
include_list: PathList | None = None,
transformer: Transformer | None = None):
"""Initialize a recipe for a :py:class:`~.tabulator.Tabulator`.

The attributes :py:attr:`~.include_list` and :py:attr:`~.exclude_list` control which properties
Expand Down Expand Up @@ -122,33 +135,40 @@ def __init__(self, exclude_list: dict = None, include_list: dict = None, transfo
:param transform: Specifies special transformations for certain properties for tabulation.
:param kwargs: Additional keyword arguments for subclasses.
"""
# note: for the in/ex lists, using the public setter here,
# to trigger conversion
self._exclude_list = exclude_list if exclude_list else {}
self._include_list = include_list if include_list else {}
self._exclude_list: KeyPaths
self._include_list: KeyPaths
self.dtypes: dict[tuple[str, ...], type[Any]] = {}

self.transformer = transformer

self.exclude_list = exclude_list or []
self.include_list = include_list or []

@property
def exclude_list(self) -> dict:
def exclude_list(self) -> KeyPaths:
return self._exclude_list

@exclude_list.setter
def exclude_list(self, exclude_list: _typing.Union[dict, list]):
self._exclude_list = exclude_list
def exclude_list(self, exclude_list: PathList) -> None:
if isinstance(exclude_list, dict):
self._to_keypaths()
self._exclude_list, _ = self._to_keypaths(exclude_list, 'exclude')
else:
self._exclude_list = [(path,) if not isinstance(path, (tuple, list)) else path for path in exclude_list]

@property
def include_list(self) -> dict:
def include_list(self) -> KeyPaths:
return self._include_list

@include_list.setter
def include_list(self, include_list: _typing.Union[dict, list]):
self._include_list = include_list
def include_list(self, include_list: PathList) -> None:
if isinstance(include_list, dict):
self._to_keypaths()
self._include_list, dtypes = self._to_keypaths(include_list, 'include')
self.dtypes = dtypes
else:
self._include_list = [(path,) if not isinstance(path, (tuple, list)) else path for path in include_list]

def _to_keypaths(self):
@staticmethod
def _to_keypaths(path_dict: dict[str, Any], name: str) -> KeyPaths:
"""Generate paths from a possibly nested dictionary.

This method can be used for handling include lists, exclude lists, and when writing
Expand All @@ -161,58 +181,43 @@ def _to_keypaths(self):
convert to keypaths (upper: done inside this one anyway)
"""

def _to_keypaths_recursive(sub_dict: dict, path: list):
def _to_keypaths_recursive(sub_dict: dict[str, Any], path: list[str]) -> list[tuple[list[str], Any]]:
paths = []
for k, v in sub_dict.items():
if isinstance(v, dict):
paths += _to_keypaths_recursive(v, path + [k])
paths.append((path + [k], v))
paths.extend(_to_keypaths_recursive(v, path + [k]))
else:
paths.append((path + [k], v))
return paths

for in_or_ex, a_dict in {'in': self._include_list, 'out': self._exclude_list}.items():

# precondition: not already keypaths format
is_list = isinstance(a_dict, list)
is_all_lists = is_list and all(isinstance(path, list) for path in a_dict)
if is_all_lists:
continue

# if empty, convert to empty list. if not empty, convert to keypaths
if not a_dict:
keypaths = []
else:
# convert from include list with-list format with-none format:
# same-level subkeys mentioned as list [k1,k2] -> dict {k1:None, k2:None}.
_a_dict = _masci_python_util.modify_dict(a_dict=a_dict,
transform_value=lambda v: {k: None for k in v}
if isinstance(v, list) else v,
to_level=99)

keypaths = _to_keypaths_recursive(sub_dict=_a_dict, path=[])
# the result consists of sets of subpaths. For each subset, there is
# an additianal entry where the value contains the whole subdict from
# which the paths were generated. We are not interested in those duplicate
# entries, so remove them.
keypaths = [tup for tup in keypaths if not isinstance(tup[1], dict)]

# now list should be like [(path1, None), (path2, None), ...],
# or at least of type _typing.List[_typing.Tuple[list, _typing.Any]].
# check that. if not, something is wrong.
# otherwise, just return the paths.
if all(tup[1] is None for tup in keypaths):
keypaths = [tup[0] for tup in keypaths]

# postcondition: keypaths format
is_list = isinstance(keypaths, list)
is_all_lists = is_list and all(isinstance(path, list) for path in keypaths)
if not is_all_lists:
raise TypeError(f'Could not generate keypaths of required type {_typing.List[list]} '
f'from {in_or_ex}clude list. Either specified list in wrong format '
f'(see class init docstring for examples), or list generated from '
f'autolist stumbled over untreated special case for some unpacked '
f'property.')

if in_or_ex == 'in':
self._include_list = keypaths
elif in_or_ex == 'out':
self._exclude_list = keypaths
# if empty, convert to empty list. if not empty, convert to keypaths
if not path_dict:
return []

# convert from include list with-list format with-none format:
# same-level subkeys mentioned as list [k1,k2] -> dict {k1:None, k2:None}.
_a_dict = _masci_python_util.modify_dict(a_dict=path_dict,
transform_value=lambda v: {k: None for k in v}
if isinstance(v, list) else v,
to_level=99)

keypaths = _to_keypaths_recursive(sub_dict=_a_dict, path=[])

# now list should be like [(path1, None), (path2, None), ...],
# or at least of type _typing.List[_typing.Tuple[list, _typing.Any]].
# check that. if not, something is wrong.
# otherwise, just return the paths.
datatypes = {tuple(path): dtype for path, dtype in keypaths if dtype is not None}
keypaths = [tuple(path) for path, dtype in keypaths] #type:ignore

# postcondition: keypaths format
is_list = isinstance(keypaths, list)
is_all_lists = is_list and all(isinstance(path, tuple) for path in keypaths)
if not is_all_lists:
raise TypeError(f'Could not generate keypaths of required type list of lists '
f'from {name} list. Either specified list in wrong format '
f'(see class init docstring for examples), or list generated from '
f'autolist stumbled over untreated special case for some unpacked '
f'property.')

return keypaths, datatypes
Loading