Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Implements sklearn style pretty printing directly in skbase #150

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions docs/source/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,27 @@ Base Classes
BaseObject
BaseEstimator

.. _global_config:

Configure ``skbase``
====================

.. automodule:: skbase.config
:no-members:
:no-inherited-members:

.. currentmodule:: skbase.config

.. autosummary::
:toctree: api_reference/auto_generated/
:template: function.rst

get_config
get_default_config
set_config
reset_config
config_context

.. _obj_retrieval:

Object Retrieval
Expand Down
15 changes: 15 additions & 0 deletions docs/source/user_documentation/user_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ that ``skbase`` provides, see the :ref:`api_ref`.
user_guide/lookup
user_guide/validate
user_guide/testing
user_guide/configuration


.. grid:: 1 2 2 2
Expand Down Expand Up @@ -103,3 +104,17 @@ that ``skbase`` provides, see the :ref:`api_ref`.
:expand:

Testing

.. grid-item-card:: Configuration
:text-align: center

Configure ``skbase``.

+++

.. button-ref:: user_guide/configuration
:color: primary
:click-parent:
:expand:

Configuration
11 changes: 11 additions & 0 deletions docs/source/user_documentation/user_guide/configuration.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _user_guide_global_config:

====================
Configure ``skbase``
====================

.. note::

The user guide is under development. We have created a basic
structure and are looking for contributions to develop the user guide
further.
118 changes: 117 additions & 1 deletion skbase/base/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class name: BaseEstimator
fitted state check - check_is_fitted (raises error if not is_fitted)
"""
import inspect
import re
import warnings
from collections import defaultdict
from copy import deepcopy
Expand All @@ -62,7 +63,9 @@ class name: BaseEstimator
from sklearn.base import BaseEstimator as _BaseEstimator

from skbase._exceptions import NotFittedError
from skbase.base._pretty_printing._object_html_repr import _object_html_repr
from skbase.base._tagmanager import _FlagManager
from skbase.config import get_config

__author__: List[str] = ["mloning", "RNKuhns", "fkiraly"]
__all__: List[str] = ["BaseEstimator", "BaseObject"]
Expand Down Expand Up @@ -446,7 +449,28 @@ def get_config(self):
class attribute via nested inheritance and then any overrides
and new tags from _onfig_dynamic object attribute.
"""
return self._get_flags(flag_attr_name="_config")
config = get_config().copy()

# Get any extension configuration interface defined in the class
# for example if downstream package wants to extend skbase to retrieve
# their own config
if hasattr(self, "__skbase_get_config__") and callable(
self.__skbase_get_config__
):
skbase_get_config_extension_dict = self.__skbase_get_config__()
else:
skbase_get_config_extension_dict = {}
if isinstance(skbase_get_config_extension_dict, dict):
config.update(skbase_get_config_extension_dict)
else:
msg = "Use of `__skbase_get_config__` to extend the interface for local "
msg += "overrides of the global configuration must return a dictionary.\n"
msg += f"But a {type(skbase_get_config_extension_dict)} was found."
warnings.warn(msg, UserWarning, stacklevel=2)
local_config = self._get_flags(flag_attr_name="_config")
config.update(local_config)

return config

def set_config(self, **config_dict):
"""Set config flags to given values.
Expand Down Expand Up @@ -682,6 +706,98 @@ def _components(self, base_class=None):

return comp_dict

def __repr__(self, n_char_max: int = 700):
"""Represent class as string.

This follows the scikit-learn implementation for the string representation
of parameterized objects.

Parameters
----------
n_char_max : int
Maximum (approximate) number of non-blank characters to render. This
can be useful in testing.
"""
from skbase.base._pretty_printing._pprint import _BaseObjectPrettyPrinter

n_max_elements_to_show = 30 # number of elements to show in sequences
# use ellipsis for sequences with a lot of elements
pp = _BaseObjectPrettyPrinter(
compact=True,
indent=1,
indent_at_name=True,
n_max_elements_to_show=n_max_elements_to_show,
changed_only=get_config()["print_changed_only"],
)

repr_ = pp.pformat(self)

# Use bruteforce ellipsis when there are a lot of non-blank characters
n_nonblank = len("".join(repr_.split()))
if n_nonblank > n_char_max:
lim = n_char_max // 2 # apprx number of chars to keep on both ends
regex = r"^(\s*\S){%d}" % lim
# The regex '^(\s*\S){%d}' matches from the start of the string
# until the nth non-blank character:
# - ^ matches the start of string
# - (pattern){n} matches n repetitions of pattern
# - \s*\S matches a non-blank char following zero or more blanks
left_match = re.match(regex, repr_)
right_match = re.match(regex, repr_[::-1])
left_lim = left_match.end() if left_match is not None else 0
right_lim = right_match.end() if right_match is not None else 0

if "\n" in repr_[left_lim:-right_lim]:
# The left side and right side aren't on the same line.
# To avoid weird cuts, e.g.:
# categoric...ore',
# we need to start the right side with an appropriate newline
# character so that it renders properly as:
# categoric...
# handle_unknown='ignore',
# so we add [^\n]*\n which matches until the next \n
regex += r"[^\n]*\n"
right_match = re.match(regex, repr_[::-1])
right_lim = right_match.end() if right_match is not None else 0

ellipsis = "..."
if left_lim + len(ellipsis) < len(repr_) - right_lim:
# Only add ellipsis if it results in a shorter repr
repr_ = repr_[:left_lim] + "..." + repr_[-right_lim:]

return repr_

@property
def _repr_html_(self):
"""HTML representation of BaseObject.

This is redundant with the logic of `_repr_mimebundle_`. The latter
should be favorted in the long term, `_repr_html_` is only
implemented for consumers who do not interpret `_repr_mimbundle_`.
"""
if get_config()["display"] != "diagram":
raise AttributeError(
"_repr_html_ is only defined when the "
"`display` configuration option is set to 'diagram'."
)
return self._repr_html_inner

def _repr_html_inner(self):
"""Return HTML representation of class.

This function is returned by the @property `_repr_html_` to make
`hasattr(BaseObject, "_repr_html_") return `True` or `False` depending
on `get_config()["display"]`.
"""
return _object_html_repr(self)

def _repr_mimebundle_(self, **kwargs):
"""Mime bundle used by jupyter kernels to display instances of BaseObject."""
output = {"text/plain": repr(self)}
if get_config()["display"] == "diagram":
output["text/html"] = _object_html_repr(self)
return output


class TagAliaserMixin:
"""Mixin class for tag aliasing and deprecation of old tags.
Expand Down
11 changes: 11 additions & 0 deletions skbase/base/_pretty_printing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python3 -u
# -*- coding: utf-8 -*-
# copyright: skbase developers, BSD-3-Clause License (see LICENSE file)
# Many elements of this code were developed in scikit-learn. These elements
# are copyrighted by the scikit-learn developers, BSD-3-Clause License. For
# conditions see https://github.com/scikit-learn/scikit-learn/blob/main/COPYING
"""Functionality for pretty printing BaseObjects."""
from typing import List

__author__: List[str] = ["RNKuhns"]
__all__: List[str] = []
Loading