From 6dff9b6e202afc9ec755d5e4e25ec56242d8514e Mon Sep 17 00:00:00 2001 From: Siddhant Sadangi Date: Wed, 10 Apr 2024 10:58:42 +0200 Subject: [PATCH] Fix for Yellowbrick-Matplotlib incompatibility (#28) * Yellowbrick matploltib incompatibility fix; expanded tests; replaced print with warning * Updated action versions * Added env variable for matploltib installation * Excluding windows tests on python 3.10 * Fixed typo * Updated changelog, removed graphviz installation * Removed graphviz installation; Used "agg" as mpl backend * Monkey patched yellowbrick.regressor.CooksDistance.draw() * Changed to private method * Update __init__.py --- .github/actions/e2e/action.yml | 8 +--- .github/workflows/ci.yml | 15 +++--- CHANGELOG.md | 8 ++++ src/neptune_sklearn/impl/__init__.py | 70 +++++++++++++++++++++------- tests/test_e2e.py | 3 ++ 5 files changed, 75 insertions(+), 29 deletions(-) diff --git a/.github/actions/e2e/action.yml b/.github/actions/e2e/action.yml index 81d2472..9fd3027 100644 --- a/.github/actions/e2e/action.yml +++ b/.github/actions/e2e/action.yml @@ -19,12 +19,8 @@ runs: working-directory: ${{ inputs.working_directory }} run: pip install -e .[dev] shell: bash - - - name: Setup Graphviz - uses: ts-graphviz/setup-graphviz@v1 - with: - ubuntu-skip-apt-update: true - macos-skip-brew-update: true + env: + MPLLOCALFREETYPE: "1" - name: Run tests working-directory: ${{ inputs.working_directory }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b4d6e63..cb2090f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,9 +4,9 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: 3.9 @@ -23,11 +23,12 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.9] + python-version: ["3.8", "3.10", "3.12"] + steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -42,9 +43,9 @@ jobs: runs-on: ubuntu-latest if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: 3.9 diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b44894..fe9189d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## neptune-sklearn 2.1.3 + +### Fixes +- Monkey patches [`yellowbrick.regression.CooksDistance.draw()`](https://github.com/DistrictDataLabs/yellowbrick/blob/f7a8e950bd31452ea2f5d402a1c5d519cd163fd5/yellowbrick/regressor/influence.py#L184) to remove unsupported `use_line_collection` matplotlib arg ([#28](https://github.com/neptune-ai/neptune-sklearn/pull/28)) + +### Changes +- Replaced `print()` with `warnings.warn()` to better capture `stderr` ([#28](https://github.com/neptune-ai/neptune-sklearn/pull/28)) + ## neptune-sklearn 2.1.2 ### Changes diff --git a/src/neptune_sklearn/impl/__init__.py b/src/neptune_sklearn/impl/__init__.py index e881975..d82e571 100644 --- a/src/neptune_sklearn/impl/__init__.py +++ b/src/neptune_sklearn/impl/__init__.py @@ -86,6 +86,8 @@ ) from neptune.new.utils import stringify_unsupported +from warnings import warn + def create_regressor_summary(regressor, X_train, X_test, y_train, y_test, nrows=1000, log_charts=True): """Creates scikit-learn regressor summary. @@ -455,7 +457,7 @@ def get_test_preds_proba(classifier, X_test=None, y_pred_proba=None, nrows=1000) try: y_pred_proba = classifier.predict_proba(X_test) except Exception as e: - print("This classifier does not provide predictions probabilities. Error: {}".format(e)) + warn(f"This classifier does not provide predictions probabilities. Error: {e}") return df = pd.DataFrame(data=y_pred_proba, columns=classifier.classes_) @@ -590,7 +592,7 @@ def create_learning_curve_chart(regressor, X_train, y_train): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log learning curve chart. Error: {}".format(e)) + warn(f"Did not log learning curve chart. Error: {e}") return chart @@ -633,7 +635,7 @@ def create_feature_importance_chart(regressor, X_train, y_train): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log feature importance chart. Error: {}".format(e)) + warn(f"Did not log feature importance chart. Error: {e}") return chart @@ -678,7 +680,7 @@ def create_residuals_chart(regressor, X_train, X_test, y_train, y_test): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log residuals chart. Error: {}".format(e)) + warn(f"Did not log residuals chart. Error: {e}") return chart @@ -723,11 +725,47 @@ def create_prediction_error_chart(regressor, X_train, X_test, y_train, y_test): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log prediction error chart. Error: {}".format(e)) + warn(f"Did not log prediction error chart. Error: {e}") return chart +def _monkey_draw(self): + """ + Monkey patches `yellowbrick.regressor.CooksDistance.draw()` + to remove unsupported matplotlib argument `use_line_collection`. + + Draws a stem plot where each stem is the Cook's Distance of the instance at the + index specified by the x axis. Optionaly draws a threshold line. + """ + # Draw a stem plot with the influence for each instance + _, _, baseline = self.ax.stem( + self.distance_, + linefmt=self.linefmt, + markerfmt=self.markerfmt, + # use_line_collection=True + ) + + # No padding on either side of the instance index + self.ax.set_xlim(0, len(self.distance_)) + + # Draw the threshold for most influential points + if self.draw_threshold: + label = r"{:0.2f}% > $I_t$ ($I_t=\frac {{4}} {{n}}$)".format(self.outlier_percentage_) + self.ax.axhline( + self.influence_threshold_, + ls="--", + label=label, + c=baseline.get_color(), + lw=baseline.get_linewidth(), + ) + + return self.ax + + +CooksDistance.draw = _monkey_draw + + def create_cooks_distance_chart(regressor, X_train, y_train): """Creates cooks distance chart. @@ -765,7 +803,7 @@ def create_cooks_distance_chart(regressor, X_train, y_train): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log cooks distance chart. Error: {}".format(e)) + warn(f"Did not log cooks distance chart. Error: {e}") return chart @@ -812,7 +850,7 @@ def create_classification_report_chart(classifier, X_train, X_test, y_train, y_t chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log Classification Report chart. Error: {}".format(e)) + warn(f"Did not log Classification Report chart. Error: {e}") return chart @@ -859,7 +897,7 @@ def create_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log Confusion Matrix chart. Error: {}".format(e)) + warn(f"Did not log Confusion Matrix chart. Error: {e}") return chart @@ -904,7 +942,7 @@ def create_roc_auc_chart(classifier, X_train, X_test, y_train, y_test): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log ROC-AUC chart. Error {}".format(e)) + warn(f"Did not log ROC-AUC chart. Error {e}") return chart @@ -943,9 +981,9 @@ def create_precision_recall_chart(classifier, X_test, y_test, y_pred_proba=None) try: y_pred_proba = classifier.predict_proba(X_test) except Exception as e: - print( - "Did not log Precision-Recall chart: this classifier does not provide predictions probabilities." - "Error {}".format(e) + warn( + f"""Did not log Precision-Recall chart: this classifier does not provide predictions probabilities. + Error {e}""" ) return chart @@ -955,7 +993,7 @@ def create_precision_recall_chart(classifier, X_test, y_test, y_pred_proba=None) chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log Precision-Recall chart. Error {}".format(e)) + warn(f"Did not log Precision-Recall chart. Error {e}") return chart @@ -1002,7 +1040,7 @@ def create_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_ chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log Class Prediction Error chart. Error {}".format(e)) + warn(f"Did not log Class Prediction Error chart. Error {e}") return chart @@ -1088,7 +1126,7 @@ def create_kelbow_chart(model, X, **kwargs): chart = File.as_image(fig) plt.close(fig) except Exception as e: - print("Did not log KMeans elbow chart. Error {}".format(e)) + warn(f"Did not log KMeans elbow chart. Error {e}") return chart @@ -1140,6 +1178,6 @@ def create_silhouette_chart(model, X, **kwargs): charts.append(File.as_image(fig)) plt.close(fig) except Exception as e: - print("Did not log Silhouette Coefficients chart. Error {}".format(e)) + warn(f"Did not log Silhouette Coefficients chart. Error {e}") return FileSeries(charts) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index d17c00c..7d4a8e4 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -6,6 +6,7 @@ except ImportError: from neptune.new import Run, init_run +import matplotlib as mpl import pytest from sklearn.cluster import KMeans from sklearn.dummy import ( @@ -16,6 +17,8 @@ import neptune_sklearn as npt_utils +mpl.use("agg") + def test_classifier_summary(iris): with init_run() as run: