Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various small improvements and fixes of pytools code; completion of docstrings #99

Merged
merged 21 commits into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
088cf49
remove explicit reference to deprecated pandas Panel in is_list_like()
j-ittner Nov 27, 2020
1c9fe43
fix type hinting of deprecated decorator
j-ittner Nov 27, 2020
2fe1b40
fix type hinting of function data.sim_data()
j-ittner Nov 27, 2020
940e1f5
add new exception NotFittedError, raised when an object is not fitted
j-ittner Nov 27, 2020
d942bc5
add new package pytools.meta
j-ittner Nov 27, 2020
8e6f48e
move function text_contrast_color to pytools.viz.colors
j-ittner Nov 27, 2020
84212c3
make PercentageFormatter a singleton class
j-ittner Nov 27, 2020
3e12c96
fix type hinting of TextStyle.__init__()
j-ittner Nov 27, 2020
43f536d
move function text_contrast_color to pytools.viz.colors (2)
j-ittner Nov 27, 2020
6cf3f6f
fix handling of unsupported fields in PercentageMatrixMatplotStyle
j-ittner Nov 27, 2020
6fd2db2
fix type hinting of MatrixDrawer.__init__()
j-ittner Nov 27, 2020
ca9f3e3
move function text_contrast_color to pytools.viz.colors (3)
j-ittner Nov 27, 2020
8fdad4a
fix formatting of percentages >= 99.5% in DendrogramHeatmapStyle
j-ittner Nov 27, 2020
83ff5fe
make PercentageFormatter a singleton class (2)
j-ittner Nov 27, 2020
1c6da79
rename …_name attributes of linkage tree to …_label
j-ittner Nov 27, 2020
447ccc0
rename _get_style_attributes to _get_style_kwargs
j-ittner Nov 27, 2020
b1e45c2
fix type hinting of DendrogramDrawer.__init__()
j-ittner Nov 27, 2020
8c17330
improve node validation in LinkageTree.children()
j-ittner Nov 27, 2020
70f2bbf
fix references to class DrawingStyle (instead of the old DrawStyle)
j-ittner Nov 27, 2020
9db620c
rename _get_style_attributes to _get_style_kwargs (2)
j-ittner Nov 27, 2020
4d777a9
improve and complete pytools docstrings
j-ittner Nov 27, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 56 additions & 51 deletions src/pytools/api/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,17 @@ class AllTracker:
"""

def __init__(self, globals_: Dict[str, Any]):
"""
:param globals_: the dictionary of global variables returned by calling
:meth:`.globals` in the current module scope
"""
self.globals_ = globals_
self.imported = set(globals_.keys())

def validate(self) -> None:
"""
Validate that all eligible symbols defined since creation of this tracker
are listed in the ``__all__`` field.
are listed in the ``__all__`` variable.

:raise RuntimeError: if ``__all__`` is not as expected
"""
Expand Down Expand Up @@ -97,15 +101,13 @@ def is_list_like(obj: Any) -> bool:
``__getitem__``. These include, for example, lists, tuples, sets, NumPy arrays, and
Pandas series and indices.

As an exception, the following types are not considered list-like despite
implementing the methods above:
As an exception, the following types are not considered list-like:

- :class:`str`
- :class:`bytes`
- :class:`pandas.DataFrame`: inconsistent behaviour of the sequence interface; \
iterating a data frame yields the values of the column index, while the length \
of a data frame is its number of rows
- :class:`pandas.Panel`: similar behaviour as for data frames
- :class:`numpy.ndarray` instances with 0 dimensions

:param obj: The object to check
Expand All @@ -115,7 +117,9 @@ def is_list_like(obj: Any) -> bool:
return (
hasattr(obj, "__len__")
and hasattr(obj, "__getitem__")
and not isinstance(obj, (str, bytes, pd.DataFrame, pd.Panel))
and not isinstance(obj, (str, bytes))
# pandas data objects with more than 1 dimension, e.g., data frames
and not (isinstance(obj, pd.NDFrame) and obj.ndim != 1)
# exclude zero-dimensional numpy arrays, effectively scalars
and not (isinstance(obj, np.ndarray) and obj.ndim == 0)
)
Expand All @@ -130,20 +134,18 @@ def to_tuple(
"""
Return the given values as a tuple.

- if arg values is a tuple, return arg values unchanged
- if arg values is an iterable and is an instance of the expected type,
return a tuple with the value as its only element
- if arg values is an iterable and is not an instance of the expected type,
return a tuple of its elements
- if arg values is not an iterable,
return a tuple with the value as its only element
- if arg `values` is a tuple, return arg `values` unchanged
- if arg `values` is an iterable other than a tuple, return a list of its elements
- if arg `values` is not an iterable, return a tuple with the value as its only
element

:param values: one or more elements to return as a tuple
:param element_type: expected type of the values, raise a TypeException if one \
:param element_type: expected type of the values; raise a ``TypeException`` if one
or more values do not implement this type
:param arg_name: name of the argument when calling this to process a function or \
initializer argument. Used to construct exception messages. (optional)
:param arg_name: name of the argument as which the values were passed to a function
or method; used when composing the ``TypeException`` message
:return: the values as a tuple
:raise TypeException: one or more values did not match the expected type
"""

return _to_collection(
Expand All @@ -163,20 +165,18 @@ def to_list(
"""
Return the given values as a list.

- if arg values is a list, return arg values unchanged
- if arg values is an iterable and is an instance of the expected type,
return a list with the value as its only element
- if arg values is an iterable and is not an instance of the expected type,
return a list of its elements
- if arg values is not an iterable,
return a list with the value as its only element
- if arg `values` is a list, return arg `values` unchanged
- if arg `values` is an iterable other than a list, return a list of its elements
- if arg `values` is not an iterable, return a list with the value as its only
element

:param values: one or more elements to return as a list
:param element_type: expected type of the values, raise a TypeException if one \
:param element_type: expected type of the values; raise a ``TypeException`` if one
or more values do not implement this type
:param arg_name: name of the argument when calling this to process a function or \
initializer argument. Used to construct exception messages. (optional)
:param arg_name: name of the argument as which the values were passed to a function
or method; used when composing the ``TypeException`` message
:return: the values as a list
:raise TypeException: one or more values did not match the expected type
"""

return _to_collection(
Expand All @@ -196,20 +196,18 @@ def to_set(
"""
Return the given values as a set.

- if arg values is a set, return arg values unchanged
- if arg values is an iterable and is an instance of the expected type,
return a set with the value as its only element
- if arg values is an iterable and is not an instance of the expected type,
return a set of its elements
- if arg values is not an iterable,
return a set with the value as its only element
- if arg `values` is a set, return arg `values` unchanged
- if arg `values` is an iterable other than a set, return a set of its elements
- if arg `values` is not an iterable, return a list with the value as its only
element

:param values: one or more elements to return as a set
:param element_type: expected type of the values, raise a TypeException if one \
:param element_type: expected type of the values; raise a ``TypeException`` if one
or more values do not implement this type
:param arg_name: name of the argument when calling this to process a function or \
initializer argument. Used to construct exception messages. (optional)
:param arg_name: name of the argument as which the values were passed to a function
or method; used when composing the ``TypeException`` message
:return: the values as a set
:raise TypeException: one or more values did not match the expected type
"""

return _to_collection(
Expand Down Expand Up @@ -264,11 +262,11 @@ def validate_type(

:param value: an arbitrary object
:param expected_type: the type to check for
:param optional: if ``True``, accept ``None`` as a valid value \
(default: ``False``)
:param name: optional name of the entity to which the elements were passed. \
Use `"arg …"` for arguments, or the name of a class if verifying unnamed \
arguments.
:param optional: if ``True``, accept ``None`` as a valid value (default: ``False``)
:param name: optional name of the argument or callable with/to which the value
was passed; use ``"arg …"`` for arguments, or the name of a callable if
verifying positional arguments
:raise TypeException: the value did not match the expected type
"""
if expected_type == object:
return
Expand All @@ -291,13 +289,15 @@ def validate_element_types(
iterable: Iterable[T], *, expected_type: Type[T], name: Optional[str] = None
) -> None:
"""
Validate that all elements in the given iterable implement the expected type
Validate that all elements in the given iterable implement the expected type.

:param iterable: an iterable
:param expected_type: the type to check for
:param name: optional name of the entity to which the elements were passed. \
Use `"arg …"` for arguments, or the name of a class if verifying unnamed \
arguments.
:param name: optional name of the argument or callable with/to which the elements
were passed; use ``"arg …"`` for arguments, or the name of a callable if
verifying positional arguments
:raise TypeException: one or more elements of the iterable did not match the
expected type
"""
if expected_type == object:
return
Expand All @@ -316,11 +316,13 @@ def validate_element_types(

def get_generic_bases(cls: type) -> Tuple[type, ...]:
"""
Bugfix version of :func:`typing_inspect.get_generic_bases` that prevents
getting the generic bases of the parent class if not defined for the given class.
Bugfix version of :func:`typing_inspect.get_generic_bases`.

Prevents getting the generic bases of the parent class if not defined for the given
class.

:param cls: class to get the generic bases for
:return: the resulting generic base classes
:return: the generic base classes of the given class
"""
bases = typing_inspect.get_generic_bases(cls)
if bases is typing_inspect.get_generic_bases(super(cls, cls)):
Expand All @@ -334,14 +336,17 @@ def get_generic_bases(cls: type) -> Tuple[type, ...]:
#


def deprecated(function: Callable = None, *, message: str = None):
def deprecated(function: Callable = None, *, message: Optional[str] = None):
"""
Decorator to mark functions as deprecated.
Decorator to mark a function as deprecated.

It will result in a warning being logged when the function is used.
Logs a warning when the decorated function is called.

To deprecate classes, apply this decorator to the ``__init__`` method, not to the
class itself.

:param function: the function to be decorated
:param message: custom message to include when logging the warning (optional)
"""

def _deprecated_inner(func: callable) -> callable:
Expand Down Expand Up @@ -381,7 +386,7 @@ def deprecation_warning(message: str, stacklevel: int = 1) -> None:
Issue a deprecation warning.

:param message: the warning message
:param stacklevel: stack level relative to caller for emitting the context of the \
:param stacklevel: stack level relative to caller for emitting the context of the
warning (default: 1)
:return:
"""
Expand Down
45 changes: 23 additions & 22 deletions src/pytools/data/_simulation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Utilities for creating simulated data sets.
"""
from typing import Sequence
from typing import Optional, Sequence

import numpy as np
import pandas as pd
Expand All @@ -18,9 +18,9 @@
def sim_data(
n: int = 100,
intercept: float = -5,
two_way_coef: float = None,
two_way_coef: Optional[float] = None,
linear_vars: int = 10,
linear_var_coef: Sequence[float] = None,
linear_var_coef: Optional[Sequence[float]] = None,
noise_vars: int = 0,
corr_vars: int = 0,
corr_type: str = "AR1",
Expand All @@ -29,52 +29,53 @@ def sim_data(
bin_var_p: float = 0,
bin_coef: float = 0,
outcome: str = "classification",
regression_err: float = None,
regression_err: Optional[float] = None,
seed_val: int = 4763546,
):
"""
Simulate data for classification or regression that includes an interaction between
two linear features, and some non-linear and linear features. Noise variables,
correlated variables that are not predictive and surrogate features
two linear features, and some non-linear and linear features.

Noise variables, correlated variables that are not predictive and surrogate features
which are just derived from features that are predictive are also added.

This function is for the most part a direct translation of the twoClassSim function
from the R package caret - the option for an ordinal outcome and binary outcome
mis-labelling were omitted. Full credit for the approach used for simulating binary
classification data goes to the Authors and contributors of caret
(Caret: Kuhn, M. (2008). Caret package. Journal of Statistical Software, 28(5).
https://rdrr.io/cran/caret/man/twoClassSim.html)
This function is for the most part a direct translation of the ``twoClassSim``
function from the R package caret -- the option for an ordinal outcome and binary
outcome mis-labelling were omitted. Full credit for the approach used for simulating
binary classification data goes to the authors and contributors of caret
[`Kuhn, M. (2008). Caret package. Journal of Statistical Software, 28(5).
<https://rdrr.io/cran/caret/man/twoClassSim.html>`_]

Key modifications compared to the R implementation:
Key modifications compared to the *R* implementation:

1. The ordinal outcome option has not been translated
2. Mis-labelling of the binary outcome has not been translated
3. The addition of a linear feature that is a copy of another used in the linear \
predictor with a small amount of noise has been added to allow for the study \
of variable surrogacy/redundancy
4. Option for a binary predictor and surrogate has also been added
5. Toggle option for regression versus classification has also been added
4. Option for a binary predictor and surrogate has been added
5. Toggle option for regression versus classification has been added
6. Arguments for the coefficients of primary predictors of interest have been added

:param n: number of observations
:param intercept: value for the intercept which can be modified to generate class \
:param intercept: value for the intercept which can be modified to generate class
imbalance
:param two_way_coef: list of three coefficients: two linear terms and an \
:param two_way_coef: list of three coefficients: two linear terms and an
interaction effect
:param linear_vars: number of linear features
:param linear_var_coef: an optional list of coefficients for linear features if \
:param linear_var_coef: an optional list of coefficients for linear features if
the default is not desired
:param noise_vars: number of unrelated independent noise features (do not \
:param noise_vars: number of unrelated independent noise features (do not
contribute to the linear predictor)
:param corr_vars: number of unrelated correlated noise features (do not contribute \
:param corr_vars: number of unrelated correlated noise features (do not contribute
to the linear predictor)
:param corr_type: type of correlation (exchangeable or auto-regressive) for \
:param corr_type: type of correlation (exchangeable or auto-regressive) for
correlated noise features
:param corr_value: correlation for correlated noise features
:param surg_err: degree of noise added to first linear predictor
:param bin_var_p: prevalence for a binary feature to include in linear predictor
:param bin_coef: coefficient for the impact of binary feature on linear predictor
:param outcome: can be either classification for a binary outcome or regression \
:param outcome: can be either classification for a binary outcome or regression
for a continuous outcome
:param regression_err: the error to be used in simulating a regression outcome
:param seed_val: set a seed for reproducibility
Expand Down
24 changes: 19 additions & 5 deletions src/pytools/fit/_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Exported names
#

__all__ = ["FittableMixin"]
__all__ = ["NotFittedError", "FittableMixin"]


#
Expand All @@ -36,6 +36,12 @@
#


class NotFittedError(Exception):
"""
Raised when a fittable object was expected to be fitted but was not fitted.
"""


class FittableMixin(Generic[T_Data], metaclass=ABCMeta):
"""
Mix-in class that supports fitting the object to data.
Expand All @@ -44,7 +50,8 @@ class FittableMixin(Generic[T_Data], metaclass=ABCMeta):
@abstractmethod
def fit(self: T, _x: T_Data, **fit_params) -> T:
"""
Fit this object to the given data
Fit this object to the given data.

:param _x: the data to fit this object to
:param fit_params: optional fitting parameters
:return: self
Expand All @@ -54,13 +61,20 @@ def fit(self: T, _x: T_Data, **fit_params) -> T:
@property
@abstractmethod
def is_fitted(self) -> bool:
"""``True`` if this object is fitted, ``False`` otherwise."""
"""
``True`` if this object is fitted, ``False`` otherwise.
"""
pass

def _ensure_fitted(self) -> None:
# raise a runtime exception if this object is not fitted
"""
Raise a :class:`.NotFittedError` if this object is not fitted.

:meta public:
:raise NotFittedError: this object is not fitted
"""
if not self.is_fitted:
raise RuntimeError(f"{type(self).__name__} is not fitted")
raise NotFittedError(f"{type(self).__name__} is not fitted")


__tracker.validate()
5 changes: 5 additions & 0 deletions src/pytools/meta/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Useful meta-classes.
"""

from ._meta import *
Loading