Add code checks to the CI pipeline: isort, black, flake (#83)

* set up CI pipeline for code quality checks * fix PEP8 styling errors: E501, F403, C407, F541 * eliminate unnecessary function make_simple_transformer() Co-authored-by: Jan Ittner <ittner.jan@bcg.com>
BCG-X-Official · Oct 1, 2020 · 8a83d42 · 8a83d42
1 parent e79a4b4
commit 8a83d42
Show file tree

Hide file tree

Showing 28 changed files with 216 additions and 136 deletions.
diff --git a/.isort.cfg b/.isort.cfg
@@ -0,0 +1,6 @@
+[isort]
+profile=black
+src_paths=src,test
+known_local_folder=facet,test
+known_first_party=pytools,sklearndf
+known_third_party=numpy,pandas,joblib,sklearn,matplot
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,9 @@
 repos:
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.5.4
+    hooks:
+      - id: isort
+
   - repo: https://github.com/psf/black
     rev: 20.8b1
     hooks:

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -36,6 +36,45 @@ variables:
     }}
 
 stages:
+  # Check code quality first to fail fast (isort, flake8, black)
+  - stage: code_quality_checks
+    displayName: 'Code quality'
+    jobs:
+      - job:
+        displayName: 'isort'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: '3.7.*'
+            displayName: 'use Python 3.7'
+          - script: |
+              python -m pip install isort==5.5.4
+              python -m isort --check --diff .
+            displayName: 'Run isort'
+      - job:
+        displayName: 'black'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: '3.7.*'
+            displayName: 'use Python 3.7'
+          - script: |
+              python -m pip install black==20.8b1
+              python -m black --check .
+            displayName: 'Run black'
+      - job:
+        displayName: 'flake8'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: '3.7.*'
+            displayName: 'use Python 3.7'
+          - script: |
+              python -m pip install flake8 flake8-comprehensions flake8-import-order
+              python -m flake8 --config flake8.ini -v .
+            displayName: 'Run flake8'
+
+  # detect whether the conda build config was changed -> then we must run a build test
   - stage: detect_conda_changes
     displayName: 'detect conda changes'
 
@@ -62,7 +101,7 @@ stages:
 
 
   - stage:
-    displayName: 'simple pytest'
+    displayName: 'Unit tests'
     dependsOn: 'detect_conda_changes'
     variables:
       conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]
@@ -82,8 +121,8 @@ stages:
       steps:
         - task: UsePythonVersion@0
           inputs:
-            versionSpec: '3.7'
-          displayName: 'Use Python 3.7'
+            versionSpec: '3.7.*'
+          displayName: 'use Python 3.7'
 
         - checkout: self
         - checkout: sklearndf
@@ -129,14 +168,14 @@ stages:
   # - FOR RELEASE BRANCH: 3 BUILD TESTS
   # - OTHERWISE: 1 BUILD TEST
   - stage:
-    displayName: 'conda build & pytest'
+    displayName: 'Conda build & pytest'
     dependsOn: 'detect_conda_changes'
     variables:
       conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]
 
     jobs:
     - job:
-      displayName: '(single)'
+      displayName: 'single'
       condition: >
         and(
         ne(variables.master_or_release, 'True'),
@@ -199,7 +238,7 @@ stages:
           displayName: "Build conda package"
 
     - job:
-      displayName: '(matrix)'
+      displayName: 'matrix'
       condition: eq(variables.master_or_release, 'True')
 
       pool:
@@ -277,4 +316,4 @@ stages:
 
               cd $(System.DefaultWorkingDirectory)/facet/
               make package
-          displayName: "Build conda package"
+          displayName: "Build & test conda package"
diff --git a/environment.yml b/environment.yml
@@ -8,6 +8,9 @@ dependencies:
   - conda-build
   - conda-verify
   - docutils
+  - flake8 = 3.8.*
+  - flake8-comprehensions = 3.2.*
+  - isort = 5.5.*
   - joblib = 0.16.*
   - jupyter >= 1.0
   - lightgbm = 3.0.*

diff --git a/flake8.ini b/flake8.ini
@@ -0,0 +1,24 @@
+[flake8]
+
+max-line-length = 88
+
+show-source = true
+
+ignore =
+    E203,  # space before : (needed for how black formats slicing)
+    W503,  # line break before binary operator
+    W504,  # line break after binary operator
+    E402,  # module level import not at top of file
+    E731,  # do not assign a lambda expression, use a def
+    E741,  # ignore not easy to read variables like i l I etc
+    C408,  # Unnecessary (dict/list/tuple) call - rewrite as a literal
+    S001,  # found modulo formatter (incorrect picks up mod operations)
+
+per-file-ignores =
+    __init__.py: F401, F403, F405
+
+exclude =
+    .eggs/*.py,
+    venv/*,
+    .venv/*,
+    .git/*
diff --git a/sphinx/source/conf.py b/sphinx/source/conf.py
@@ -21,7 +21,7 @@
     )
 )
 
-from conf_base import *
+from conf_base import set_config
 
 # ----- custom configuration -----
 

diff --git a/src/facet/__init__.py b/src/facet/__init__.py
@@ -1,18 +1,27 @@
 """
-Machine learning workflows for advanced model selection and inspection.
+Machine learning workflows for advanced model selection and
+inspection.
 
 
 Implements the following subpackages:
 
-- :mod:`facet.selection`: model selection and hyperparameter tuning
-- :mod:`facet.validation`: scikit-learn-style cross-validators for different \
+- :mod:`facet.selection`: model selection and hyperparameter
+tuning
+- :mod:`facet.validation`: scikit-learn-style cross-validators
+for different \
     bootstrapping approaches
-- :mod:`facet.crossfit`: a `crossfit` object manages multiple fits \
-    of the same learner across all splits of a cross-validator, enabling a range of \
-    approaches for model selection, inspection, and simulation/optimization
-- :mod:`facet.inspection`: explaining the interactions of a model's features \
-    with each other, and with the target variable, based on the SHAP approach
-- :mod:`facet.simulation`: simulating variables using a predictive model
+- :mod:`facet.crossfit`: a `crossfit` object manages multiple
+fits \
+    of the same learner across all splits of a cross-validator,
+    enabling a range of \
+    approaches for model selection, inspection, and simulation/
+    optimization
+- :mod:`facet.inspection`: explaining the interactions of a model's
+features \
+    with each other, and with the target variable, based on the SHAP
+    approach
+- :mod:`facet.simulation`: simulating variables using a predictive
+model
 """
 
 # todo: explain the basic workflow in the docstring:

diff --git a/src/facet/_facet.py b/src/facet/_facet.py
@@ -3,7 +3,7 @@
 """
 
 from copy import copy
-from typing import *
+from typing import Any, Collection, Iterable, List, Optional, Sequence, Set, Union
 
 import pandas as pd
 

diff --git a/src/facet/crossfit/__init__.py b/src/facet/crossfit/__init__.py
@@ -3,7 +3,8 @@
 cross-validator, as the basis for model evaluation and inspection.
 
 :class:`LearnerCrossfit` encapsulates a fully trained pipeline.
-It contains a :class:`.ModelPipelineDF` (preprocessing + estimator), a dataset given by a
+It contains a :class:`.ModelPipelineDF` (preprocessing + estimator),
+a dataset given by a
 :class:`yieldengine.Sample` object and a
 cross-validation calibration. The pipeline is fitted accordingly.
 """

diff --git a/src/facet/crossfit/_crossfit.py b/src/facet/crossfit/_crossfit.py
@@ -26,7 +26,6 @@
 from sklearn.model_selection import BaseCrossValidator
 from sklearn.utils import check_random_state
 
-from facet import Sample
 from pytools.api import AllTracker, inheritdoc
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
@@ -37,6 +36,8 @@
     RegressorPipelineDF,
 )
 
+from facet import Sample
+
 log = logging.getLogger(__name__)
 
 __all__ = ["LearnerCrossfit", "Scorer"]

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
@@ -9,18 +9,13 @@
 from scipy.cluster.hierarchy import linkage
 from scipy.spatial.distance import squareform
 
-from facet import Sample
-from facet.crossfit import LearnerCrossfit
-from facet.inspection._shap_decomposition import (
-    ShapInteractionValueDecomposer,
-    ShapValueDecomposer,
-)
 from pytools.api import AllTracker, inheritdoc
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
 from pytools.viz.dendrogram import LinkageTree
 from sklearndf import ClassifierDF, LearnerDF, RegressorDF
 from sklearndf.pipeline import LearnerPipelineDF
+
 from ._explainer import TreeExplainerFactory
 from ._shap import (
     ClassifierShapInteractionValuesCalculator,
@@ -31,6 +26,12 @@
     ShapCalculator,
     ShapInteractionValuesCalculator,
 )
+from facet import Sample
+from facet.crossfit import LearnerCrossfit
+from facet.inspection._shap_decomposition import (
+    ShapInteractionValueDecomposer,
+    ShapValueDecomposer,
+)
 
 log = logging.getLogger(__name__)
 

diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
@@ -10,8 +10,6 @@
 import pandas as pd
 from shap.explainers.explainer import Explainer
 
-from facet import Sample
-from facet.crossfit import LearnerCrossfit
 from pytools.api import AllTracker
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
@@ -20,7 +18,10 @@
     LearnerPipelineDF,
     RegressorPipelineDF,
 )
+
 from ._explainer import ExplainerFactory
+from facet import Sample
+from facet.crossfit import LearnerCrossfit
 
 log = logging.getLogger(__name__)
 
@@ -545,12 +546,12 @@ def _shap_for_split(
             )
 
         # calculate the shap interaction values; ensure the result is a list of arrays
-        shap_interaction_tensors: List[np.ndarray] = (
-            ShapCalculator._shap_tensors_to_list(
-                shap_tensors=shap_interaction_values_fn(x),
-                multi_output_type=multi_output_type,
-                multi_output_names=multi_output_names,
-            )
+        shap_interaction_tensors: List[
+            np.ndarray
+        ] = ShapCalculator._shap_tensors_to_list(
+            shap_tensors=shap_interaction_values_fn(x),
+            multi_output_type=multi_output_type,
+            multi_output_names=multi_output_names,
         )
 
         interaction_matrix_per_output: List[pd.DataFrame] = [

diff --git a/src/facet/inspection/_shap_decomposition.py b/src/facet/inspection/_shap_decomposition.py
@@ -8,10 +8,11 @@
 import numpy as np
 import pandas as pd
 
-from facet.inspection._shap import ShapCalculator, ShapInteractionValuesCalculator
 from pytools.api import AllTracker
 from pytools.fit import FittableMixin
 
+from facet.inspection._shap import ShapCalculator, ShapInteractionValuesCalculator
+
 log = logging.getLogger(__name__)
 
 __all__ = ["ShapValueDecomposer", "ShapInteractionValueDecomposer"]

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
@@ -28,13 +28,14 @@
 from numpy.random.mtrand import RandomState
 from sklearn.model_selection import BaseCrossValidator
 
-from facet import Sample
-from facet.crossfit import LearnerCrossfit
 from pytools.api import AllTracker, inheritdoc, to_tuple
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
+from facet import Sample
+from facet.crossfit import LearnerCrossfit
+
 log = logging.getLogger(__name__)
 
 __all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker"]
@@ -342,7 +343,8 @@ def is_fitted(self) -> bool:
     @property
     def ranking(self) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
         """
-        A list of :class:`.LearnerEvaluation` for all learners evaluated by this ranker, \
+        A list of :class:`.LearnerEvaluation` for all learners evaluated
+        by this ranker, \
             in descending order of the ranking score.
         """
         self._ensure_fitted()
@@ -390,7 +392,7 @@ def _parameters(params: Mapping[str, Iterable[Any]]) -> str:
 
         ranking = self._ranking[:max_learners] if max_learners else self._ranking
 
-        name_width = max([len(_model_name(ranked_model)) for ranked_model in ranking])
+        name_width = max(len(_model_name(ranked_model)) for ranked_model in ranking)
 
         return "\n".join(
             [

diff --git a/src/facet/simulation/_simulation.py b/src/facet/simulation/_simulation.py
@@ -28,9 +28,10 @@
     LearnerPipelineDF,
     RegressorPipelineDF,
 )
-from .partition import Partitioner
+
 from .. import Sample
 from ..crossfit import LearnerCrossfit
+from .partition import Partitioner
 
 log = logging.getLogger(__name__)
 

diff --git a/src/facet/simulation/viz/_draw.py b/src/facet/simulation/viz/_draw.py
@@ -13,10 +13,11 @@
     Union,
 )
 
-from facet.simulation import UnivariateSimulation
 from pytools.api import AllTracker
 from pytools.viz import Drawer
+
 from ._style import SimulationMatplotStyle, SimulationReportStyle, SimulationStyle
+from facet.simulation import UnivariateSimulation
 
 __all__ = ["SimulationDrawer"]
 

diff --git a/src/facet/simulation/viz/_style.py b/src/facet/simulation/viz/_style.py
@@ -150,9 +150,9 @@ def draw_uplift(
         else:
             x = partitions
         ax = self.ax
-        line_min, = ax.plot(x, values_min, color=self._COLOR_CONFIDENCE)
-        line_median, = ax.plot(x, values_median, color=self._COLOR_MEDIAN_UPLIFT)
-        line_max, = ax.plot(x, values_max, color=self._COLOR_CONFIDENCE)
+        (line_min,) = ax.plot(x, values_min, color=self._COLOR_CONFIDENCE)
+        (line_median,) = ax.plot(x, values_median, color=self._COLOR_MEDIAN_UPLIFT)
+        (line_max,) = ax.plot(x, values_max, color=self._COLOR_CONFIDENCE)
         # add a horizontal line at y=0
         line_base = ax.axhline(y=values_baseline, linewidth=0.5)
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,7 @@ @@
         )
     )
-    from conf_base import *
+    from conf_base import set_config
     # ----- custom configuration -----
@@ Expand Down @@