Skip to content

Commit

Permalink
BUILD: add support for mypy (#324)
Browse files Browse the repository at this point in the history
* BUILD: add first part of mypy support

* BUILD: introduce minor changes

* BUILD: refactor inspection methods

* BUILD: add py.typed file

* BUILD: drop python 3.6 support

* REFACTOR: streamline code for affinity matrix calculations

* API: remove unsupported method fit_predict from CandidateEstimatorDF

* FIX: call candidate.predict(), not candidate.predic()

* API: raise exception on CandidateEstimatorDF.xyz() with empty candidate

* API: make type of CandidateEstimatorDF's 'candidate' param more specific

* BUILD: apply review

* API: pass explainer + interactions flag to ExplainerJob not explainer_fn

* API: remove redundant arg explainer from __feature_affinity_matrix()

* REFACTOR: use to_list with the 'optional' flag

* BUILD: add cast for output_name

* API: make method FittableMixin.ensure_fitted public

* BUILD: update environment.yml

* BUILD: update pre-commit config to load pytools and sklearndf for mypy

* API: remove obsolete code to compute the std of affinity scores

* API: remove obsolete properties RangePartitioner.…_bound

* FIX: ensure_fitted() is now a public method

* FIX: address remaining mypy errors

Co-authored-by: Jan Ittner <ittner.jan@bcg.com>
  • Loading branch information
mtsokol and j-ittner authored Mar 7, 2022
1 parent 21db1a3 commit 0cd01d0
Show file tree
Hide file tree
Showing 17 changed files with 402 additions and 261 deletions.
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,13 @@ repos:
- id: check-added-large-files
- id: check-json
- id: check-yaml

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.931
hooks:
- id: mypy
files: src/
additional_dependencies:
- numpy>=1.22
- gamma-pytools>=2dev6,<3a
- sklearndf>=2dev3,<3a
14 changes: 14 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,20 @@ stages:
python -m pip install flake8==3.9.0 flake8-comprehensions flake8-import-order
python -m flake8 --config tox.ini -v .
displayName: 'Run flake8'
- job:
displayName: 'mypy'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.*'
displayName: 'use Python 3.7'
- script: |
set -eux
eval "$(conda shell.bash hook)"
conda env create
conda activate $(project_name)-develop
python -m mypy src
displayName: 'Run mypy'
# detect whether the build config (pyproject.toml) was changed -> then we must run a build test
- stage: detect_build_config_changes
Expand Down
9 changes: 5 additions & 4 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ channels:
dependencies:
# run
- boruta_py ~= 0.3
- gamma-pytools >= 2.0.dev5, <3a
- gamma-pytools >= 2dev6, <3a
- joblib ~= 1.1
- lightgbm ~= 3.3
- matplotlib ~= 3.5
Expand All @@ -15,7 +15,7 @@ dependencies:
- scikit-learn ~= 1.0.2
- scipy ~= 1.8
- shap ~=0.40
- sklearndf >= 2dev2, < 3a
- sklearndf >= 2dev3, < 3a
# build/test
- black ~= 22.1
- conda-build ~= 3.21
Expand All @@ -26,20 +26,21 @@ dependencies:
- jinja2 ~= 2.11
- markupsafe < 2.1 # markupsafe 2.1 breaks support for jinja2
- m2r ~= 0.2
- mypy ~= 0.931
- pluggy ~= 0.13
- pre-commit ~= 2.17
- pydata-sphinx-theme ~= 0.7
- pytest ~= 5.4
- pytest-cov ~= 2.12
- pyyaml ~= 5.4
- sphinx ~= 4.2
- sphinx ~= 4.4
- sphinx-autodoc-typehints ~= 1.12
- toml ~= 0.10
- tox ~= 3.24
- yaml ~= 0.2
# notebooks
- jupyterlab ~= 3.2
- nbclassic ~= 0.3
- nbsphinx ~= 0.7.1
- nbsphinx ~= 0.8.8
- openpyxl ~= 3.0
- seaborn ~= 0.11
35 changes: 35 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[mypy]
show_error_codes = True
allow_redefinition = True

[mypy-catboost.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True

[mypy-matplotlib.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True

[mypy-mpl_toolkits.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True

[mypy-pandas.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True

[mypy-pytools.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True

[mypy-scipy.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True

[mypy-sklearn.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True

[mypy-shap.*]
; TODO remove once PEP 561 is supported
ignore_missing_imports = True
81 changes: 40 additions & 41 deletions src/facet/data/partition/_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import math
import operator as op
from abc import ABCMeta, abstractmethod
from numbers import Number
from typing import Any, Generic, Iterable, Optional, Sequence, Tuple, TypeVar
from typing import Any, Generic, Optional, Sequence, Tuple, TypeVar

import numpy as np

Expand All @@ -29,10 +28,12 @@
# Type variables
#


T_Self = TypeVar("T_Self")
T_Partitioner = TypeVar("T_Partitioner", bound="Partitioner")
T_RangePartitioner = TypeVar("T_RangePartitioner", bound="RangePartitioner")
T_CategoryPartitioner = TypeVar("T_CategoryPartitioner", bound="CategoryPartitioner")
T_Values = TypeVar("T_Values")
T_Values_Numeric = TypeVar("T_Values_Numeric", bound=Number)
T_Values_Numeric = TypeVar("T_Values_Numeric", float, int)


#
# Ensure all symbols introduced below are included in __all__
Expand All @@ -47,7 +48,7 @@


class Partitioner(
FittableMixin[Iterable[T_Values]], Generic[T_Values], metaclass=ABCMeta
FittableMixin[Sequence[T_Values]], Generic[T_Values], metaclass=ABCMeta
):
"""
Abstract base class of all partitioners.
Expand All @@ -56,7 +57,7 @@ class Partitioner(
DEFAULT_MAX_PARTITIONS = 20

#: The values representing the partitions.
_partitions: Optional[np.ndarray]
_partitions: Optional[Sequence[T_Values]]

#: The count of values allocated to each partition.
_frequencies: Optional[np.ndarray]
Expand All @@ -76,7 +77,8 @@ def __init__(self, max_partitions: Optional[int] = None) -> None:
self._partitions = None
self._frequencies = None

__init__.__doc__ = __init__.__doc__.replace(
# mypy - incorrect type inference for __doc__
__init__.__doc__ = __init__.__doc__.replace( # type: ignore
"{DEFAULT_MAX_PARTITIONS}", repr(DEFAULT_MAX_PARTITIONS)
)

Expand All @@ -88,19 +90,23 @@ def max_partitions(self) -> int:
return self._max_partitions

@property
def partitions_(self) -> np.ndarray:
def partitions_(self) -> Sequence[T_Values]:
"""
The values representing the partitions.
"""

self.ensure_fitted()
assert self._partitions is not None, "Partitioner is fitted"
return self._partitions

@property
def frequencies_(self) -> np.ndarray:
"""
The count of values allocated to each partition.
"""

self.ensure_fitted()
assert self._frequencies is not None, "Partitioner is fitted"
return self._frequencies

@property
Expand All @@ -111,7 +117,12 @@ def is_categorical(self) -> bool:
"""

@abstractmethod
def fit(self: T_Self, values: Iterable[T_Values], **fit_params: Any) -> T_Self:
def fit( # type: ignore[override]
# todo: remove 'type: ignore' once mypy correctly infers return type
self: T_Partitioner,
values: Sequence[T_Values],
**fit_params: Any,
) -> T_Partitioner:
"""
Calculate the partitioning for the given observed values.
Expand All @@ -122,7 +133,7 @@ def fit(self: T_Self, values: Iterable[T_Values], **fit_params: Any) -> T_Self:
"""

@staticmethod
def _as_non_empty_array(values: Iterable[Any]) -> np.ndarray:
def _as_non_empty_array(values: Sequence[Any]) -> np.ndarray:
# ensure arg values is a non-empty array
values = np.asarray(values)
if len(values) == 0:
Expand All @@ -148,24 +159,6 @@ def __init__(self, max_partitions: Optional[int] = None) -> None:
Sequence[Tuple[T_Values_Numeric, T_Values_Numeric]]
] = None

@property
def lower_bound(self) -> T_Values_Numeric:
"""
The lower bound of the partitioning.
``Null`` if no explicit lower bound is set.
"""
return self._lower_bound

@property
def upper_bound(self) -> T_Values_Numeric:
"""
The upper bound of the partitioning.
``Null`` if no explicit upper bound is set.
"""
return self._upper_bound

@property
def is_categorical(self) -> bool:
"""
Expand All @@ -182,26 +175,30 @@ def partition_bounds_(self) -> Sequence[Tuple[T_Values_Numeric, T_Values_Numeric
inclusive lower bound of a partition range, and y is the exclusive upper
bound of a partition range
"""

self.ensure_fitted()
assert self._partition_bounds is not None, "Partitioner is fitted"
return self._partition_bounds

@property
def partition_width_(self) -> T_Values_Numeric:
"""
The width of each partition.
"""

self.ensure_fitted()
assert self._step is not None, "Partitioner is fitted"
return self._step

# noinspection PyMissingOrEmptyDocstring,PyIncorrectDocstring
def fit(
self: T_Self,
values: Iterable[T_Values_Numeric],
def fit( # type: ignore[override]
# todo: remove 'type: ignore' once mypy correctly infers return type
self: T_RangePartitioner,
values: Sequence[T_Values_Numeric],
*,
lower_bound: Optional[T_Values_Numeric] = None,
upper_bound: Optional[T_Values_Numeric] = None,
**fit_params: Any,
) -> T_Self:
) -> T_RangePartitioner:
r"""
Calculate the partitioning for the given observed values.
Expand All @@ -220,8 +217,6 @@ def fit(
:return: ``self``
"""

self: RangePartitioner # support type hinting in PyCharm

values = self._as_non_empty_array(values)

if lower_bound is None or upper_bound is None:
Expand Down Expand Up @@ -305,10 +300,9 @@ def _ceil_step(step: float):

return min(10 ** math.ceil(math.log10(step * m)) / m for m in [1, 2, 5])

@staticmethod
@abstractmethod
def _step_size(
lower_bound: T_Values_Numeric, upper_bound: T_Values_Numeric
self, lower_bound: T_Values_Numeric, upper_bound: T_Values_Numeric
) -> T_Values_Numeric:
# Compute the step size (interval length) used in the partitions
pass
Expand Down Expand Up @@ -348,6 +342,7 @@ def _step_size(self, lower_bound: float, upper_bound: float) -> float:

@property
def _partition_center_offset(self) -> float:
assert self._step is not None, "Partitioner is fitted"
return self._step / 2


Expand Down Expand Up @@ -384,6 +379,7 @@ def _step_size(self, lower_bound: int, upper_bound: int) -> int:

@property
def _partition_center_offset(self) -> int:
assert self._step is not None, "Partitioner is fitted"
return self._step // 2


Expand All @@ -409,11 +405,14 @@ def is_categorical(self) -> bool:
return True

# noinspection PyMissingOrEmptyDocstring
def fit(self: T_Self, values: Iterable[T_Values], **fit_params: Any) -> T_Self:
def fit( # type: ignore[override]
# todo: remove 'type: ignore' once mypy correctly infers return type
self: T_CategoryPartitioner,
values: Sequence[T_Values],
**fit_params: Any,
) -> T_CategoryPartitioner:
"""[see superclass]"""

self: CategoryPartitioner # support type hinting in PyCharm

values = self._as_non_empty_array(values)

partitions, frequencies = np.unique(values, return_counts=True)
Expand Down
Loading

0 comments on commit 0cd01d0

Please sign in to comment.