Skip to content

Commit

Permalink
Add code checks to the CI pipeline: isort, black, flake (#83)
Browse files Browse the repository at this point in the history
* set up CI pipeline for code quality checks
* fix PEP8 styling errors: E501, F403, C407, F541
* eliminate unnecessary function make_simple_transformer()

Co-authored-by: Jan Ittner <ittner.jan@bcg.com>
  • Loading branch information
mgelsm and j-ittner authored Oct 1, 2020
1 parent e79a4b4 commit 8a83d42
Show file tree
Hide file tree
Showing 28 changed files with 216 additions and 136 deletions.
6 changes: 6 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[isort]
profile=black
src_paths=src,test
known_local_folder=facet,test
known_first_party=pytools,sklearndf
known_third_party=numpy,pandas,joblib,sklearn,matplot
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
repos:
- repo: https://github.com/PyCQA/isort
rev: 5.5.4
hooks:
- id: isort

- repo: https://github.com/psf/black
rev: 20.8b1
hooks:
Expand Down
53 changes: 46 additions & 7 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,45 @@ variables:
}}
stages:
# Check code quality first to fail fast (isort, flake8, black)
- stage: code_quality_checks
displayName: 'Code quality'
jobs:
- job:
displayName: 'isort'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.*'
displayName: 'use Python 3.7'
- script: |
python -m pip install isort==5.5.4
python -m isort --check --diff .
displayName: 'Run isort'
- job:
displayName: 'black'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.*'
displayName: 'use Python 3.7'
- script: |
python -m pip install black==20.8b1
python -m black --check .
displayName: 'Run black'
- job:
displayName: 'flake8'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.*'
displayName: 'use Python 3.7'
- script: |
python -m pip install flake8 flake8-comprehensions flake8-import-order
python -m flake8 --config flake8.ini -v .
displayName: 'Run flake8'
# detect whether the conda build config was changed -> then we must run a build test
- stage: detect_conda_changes
displayName: 'detect conda changes'

Expand All @@ -62,7 +101,7 @@ stages:
- stage:
displayName: 'simple pytest'
displayName: 'Unit tests'
dependsOn: 'detect_conda_changes'
variables:
conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]
Expand All @@ -82,8 +121,8 @@ stages:
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7'
displayName: 'Use Python 3.7'
versionSpec: '3.7.*'
displayName: 'use Python 3.7'

- checkout: self
- checkout: sklearndf
Expand Down Expand Up @@ -129,14 +168,14 @@ stages:
# - FOR RELEASE BRANCH: 3 BUILD TESTS
# - OTHERWISE: 1 BUILD TEST
- stage:
displayName: 'conda build & pytest'
displayName: 'Conda build & pytest'
dependsOn: 'detect_conda_changes'
variables:
conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]

jobs:
- job:
displayName: '(single)'
displayName: 'single'
condition: >
and(
ne(variables.master_or_release, 'True'),
Expand Down Expand Up @@ -199,7 +238,7 @@ stages:
displayName: "Build conda package"

- job:
displayName: '(matrix)'
displayName: 'matrix'
condition: eq(variables.master_or_release, 'True')

pool:
Expand Down Expand Up @@ -277,4 +316,4 @@ stages:
cd $(System.DefaultWorkingDirectory)/facet/
make package
displayName: "Build conda package"
displayName: "Build & test conda package"
3 changes: 3 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ dependencies:
- conda-build
- conda-verify
- docutils
- flake8 = 3.8.*
- flake8-comprehensions = 3.2.*
- isort = 5.5.*
- joblib = 0.16.*
- jupyter >= 1.0
- lightgbm = 3.0.*
Expand Down
24 changes: 24 additions & 0 deletions flake8.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[flake8]

max-line-length = 88

show-source = true

ignore =
E203, # space before : (needed for how black formats slicing)
W503, # line break before binary operator
W504, # line break after binary operator
E402, # module level import not at top of file
E731, # do not assign a lambda expression, use a def
E741, # ignore not easy to read variables like i l I etc
C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal
S001, # found modulo formatter (incorrect picks up mod operations)

per-file-ignores =
__init__.py: F401, F403, F405

exclude =
.eggs/*.py,
venv/*,
.venv/*,
.git/*
2 changes: 1 addition & 1 deletion sphinx/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
)

from conf_base import *
from conf_base import set_config

# ----- custom configuration -----

Expand Down
27 changes: 18 additions & 9 deletions src/facet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
"""
Machine learning workflows for advanced model selection and inspection.
Machine learning workflows for advanced model selection and
inspection.
Implements the following subpackages:
- :mod:`facet.selection`: model selection and hyperparameter tuning
- :mod:`facet.validation`: scikit-learn-style cross-validators for different \
- :mod:`facet.selection`: model selection and hyperparameter
tuning
- :mod:`facet.validation`: scikit-learn-style cross-validators
for different \
bootstrapping approaches
- :mod:`facet.crossfit`: a `crossfit` object manages multiple fits \
of the same learner across all splits of a cross-validator, enabling a range of \
approaches for model selection, inspection, and simulation/optimization
- :mod:`facet.inspection`: explaining the interactions of a model's features \
with each other, and with the target variable, based on the SHAP approach
- :mod:`facet.simulation`: simulating variables using a predictive model
- :mod:`facet.crossfit`: a `crossfit` object manages multiple
fits \
of the same learner across all splits of a cross-validator,
enabling a range of \
approaches for model selection, inspection, and simulation/
optimization
- :mod:`facet.inspection`: explaining the interactions of a model's
features \
with each other, and with the target variable, based on the SHAP
approach
- :mod:`facet.simulation`: simulating variables using a predictive
model
"""

# todo: explain the basic workflow in the docstring:
Expand Down
2 changes: 1 addition & 1 deletion src/facet/_facet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from copy import copy
from typing import *
from typing import Any, Collection, Iterable, List, Optional, Sequence, Set, Union

import pandas as pd

Expand Down
3 changes: 2 additions & 1 deletion src/facet/crossfit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
cross-validator, as the basis for model evaluation and inspection.
:class:`LearnerCrossfit` encapsulates a fully trained pipeline.
It contains a :class:`.ModelPipelineDF` (preprocessing + estimator), a dataset given by a
It contains a :class:`.ModelPipelineDF` (preprocessing + estimator),
a dataset given by a
:class:`yieldengine.Sample` object and a
cross-validation calibration. The pipeline is fitted accordingly.
"""
Expand Down
3 changes: 2 additions & 1 deletion src/facet/crossfit/_crossfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from sklearn.model_selection import BaseCrossValidator
from sklearn.utils import check_random_state

from facet import Sample
from pytools.api import AllTracker, inheritdoc
from pytools.fit import FittableMixin
from pytools.parallelization import ParallelizableMixin
Expand All @@ -37,6 +36,8 @@
RegressorPipelineDF,
)

from facet import Sample

log = logging.getLogger(__name__)

__all__ = ["LearnerCrossfit", "Scorer"]
Expand Down
13 changes: 7 additions & 6 deletions src/facet/inspection/_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,13 @@
from scipy.cluster.hierarchy import linkage
from scipy.spatial.distance import squareform

from facet import Sample
from facet.crossfit import LearnerCrossfit
from facet.inspection._shap_decomposition import (
ShapInteractionValueDecomposer,
ShapValueDecomposer,
)
from pytools.api import AllTracker, inheritdoc
from pytools.fit import FittableMixin
from pytools.parallelization import ParallelizableMixin
from pytools.viz.dendrogram import LinkageTree
from sklearndf import ClassifierDF, LearnerDF, RegressorDF
from sklearndf.pipeline import LearnerPipelineDF

from ._explainer import TreeExplainerFactory
from ._shap import (
ClassifierShapInteractionValuesCalculator,
Expand All @@ -31,6 +26,12 @@
ShapCalculator,
ShapInteractionValuesCalculator,
)
from facet import Sample
from facet.crossfit import LearnerCrossfit
from facet.inspection._shap_decomposition import (
ShapInteractionValueDecomposer,
ShapValueDecomposer,
)

log = logging.getLogger(__name__)

Expand Down
17 changes: 9 additions & 8 deletions src/facet/inspection/_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import pandas as pd
from shap.explainers.explainer import Explainer

from facet import Sample
from facet.crossfit import LearnerCrossfit
from pytools.api import AllTracker
from pytools.fit import FittableMixin
from pytools.parallelization import ParallelizableMixin
Expand All @@ -20,7 +18,10 @@
LearnerPipelineDF,
RegressorPipelineDF,
)

from ._explainer import ExplainerFactory
from facet import Sample
from facet.crossfit import LearnerCrossfit

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -545,12 +546,12 @@ def _shap_for_split(
)

# calculate the shap interaction values; ensure the result is a list of arrays
shap_interaction_tensors: List[np.ndarray] = (
ShapCalculator._shap_tensors_to_list(
shap_tensors=shap_interaction_values_fn(x),
multi_output_type=multi_output_type,
multi_output_names=multi_output_names,
)
shap_interaction_tensors: List[
np.ndarray
] = ShapCalculator._shap_tensors_to_list(
shap_tensors=shap_interaction_values_fn(x),
multi_output_type=multi_output_type,
multi_output_names=multi_output_names,
)

interaction_matrix_per_output: List[pd.DataFrame] = [
Expand Down
3 changes: 2 additions & 1 deletion src/facet/inspection/_shap_decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
import numpy as np
import pandas as pd

from facet.inspection._shap import ShapCalculator, ShapInteractionValuesCalculator
from pytools.api import AllTracker
from pytools.fit import FittableMixin

from facet.inspection._shap import ShapCalculator, ShapInteractionValuesCalculator

log = logging.getLogger(__name__)

__all__ = ["ShapValueDecomposer", "ShapInteractionValueDecomposer"]
Expand Down
10 changes: 6 additions & 4 deletions src/facet/selection/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@
from numpy.random.mtrand import RandomState
from sklearn.model_selection import BaseCrossValidator

from facet import Sample
from facet.crossfit import LearnerCrossfit
from pytools.api import AllTracker, inheritdoc, to_tuple
from pytools.fit import FittableMixin
from pytools.parallelization import ParallelizableMixin
from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF

from facet import Sample
from facet.crossfit import LearnerCrossfit

log = logging.getLogger(__name__)

__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker"]
Expand Down Expand Up @@ -342,7 +343,8 @@ def is_fitted(self) -> bool:
@property
def ranking(self) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
"""
A list of :class:`.LearnerEvaluation` for all learners evaluated by this ranker, \
A list of :class:`.LearnerEvaluation` for all learners evaluated
by this ranker, \
in descending order of the ranking score.
"""
self._ensure_fitted()
Expand Down Expand Up @@ -390,7 +392,7 @@ def _parameters(params: Mapping[str, Iterable[Any]]) -> str:

ranking = self._ranking[:max_learners] if max_learners else self._ranking

name_width = max([len(_model_name(ranked_model)) for ranked_model in ranking])
name_width = max(len(_model_name(ranked_model)) for ranked_model in ranking)

return "\n".join(
[
Expand Down
3 changes: 2 additions & 1 deletion src/facet/simulation/_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
LearnerPipelineDF,
RegressorPipelineDF,
)
from .partition import Partitioner

from .. import Sample
from ..crossfit import LearnerCrossfit
from .partition import Partitioner

log = logging.getLogger(__name__)

Expand Down
3 changes: 2 additions & 1 deletion src/facet/simulation/viz/_draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
Union,
)

from facet.simulation import UnivariateSimulation
from pytools.api import AllTracker
from pytools.viz import Drawer

from ._style import SimulationMatplotStyle, SimulationReportStyle, SimulationStyle
from facet.simulation import UnivariateSimulation

__all__ = ["SimulationDrawer"]

Expand Down
6 changes: 3 additions & 3 deletions src/facet/simulation/viz/_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ def draw_uplift(
else:
x = partitions
ax = self.ax
line_min, = ax.plot(x, values_min, color=self._COLOR_CONFIDENCE)
line_median, = ax.plot(x, values_median, color=self._COLOR_MEDIAN_UPLIFT)
line_max, = ax.plot(x, values_max, color=self._COLOR_CONFIDENCE)
(line_min,) = ax.plot(x, values_min, color=self._COLOR_CONFIDENCE)
(line_median,) = ax.plot(x, values_median, color=self._COLOR_MEDIAN_UPLIFT)
(line_max,) = ax.plot(x, values_max, color=self._COLOR_CONFIDENCE)
# add a horizontal line at y=0
line_base = ax.axhline(y=values_baseline, linewidth=0.5)

Expand Down
Loading

0 comments on commit 8a83d42

Please sign in to comment.