From 6dff9b6e202afc9ec755d5e4e25ec56242d8514e Mon Sep 17 00:00:00 2001
From: Siddhant Sadangi <siddhant.sadangi@neptune.ai>
Date: Wed, 10 Apr 2024 10:58:42 +0200
Subject: [PATCH] Fix for Yellowbrick-Matplotlib incompatibility (#28)

* Yellowbrick matploltib incompatibility fix; expanded tests; replaced print with warning

* Updated action versions

* Added env variable for matploltib installation

* Excluding windows tests on python 3.10

* Fixed typo

* Updated changelog, removed graphviz installation

* Removed graphviz installation; Used "agg" as mpl backend

* Monkey patched yellowbrick.regressor.CooksDistance.draw()

* Changed to private method

* Update __init__.py
---
 .github/actions/e2e/action.yml       |  8 +---
 .github/workflows/ci.yml             | 15 +++---
 CHANGELOG.md                         |  8 ++++
 src/neptune_sklearn/impl/__init__.py | 70 +++++++++++++++++++++-------
 tests/test_e2e.py                    |  3 ++
 5 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/.github/actions/e2e/action.yml b/.github/actions/e2e/action.yml
index 81d2472..9fd3027 100644
--- a/.github/actions/e2e/action.yml
+++ b/.github/actions/e2e/action.yml
@@ -19,12 +19,8 @@ runs:
       working-directory: ${{ inputs.working_directory }}
       run: pip install -e .[dev]
       shell: bash
-
-    - name: Setup Graphviz
-      uses: ts-graphviz/setup-graphviz@v1
-      with:
-        ubuntu-skip-apt-update: true
-        macos-skip-brew-update: true
+      env:
+        MPLLOCALFREETYPE: "1"
 
     - name: Run tests
       working-directory: ${{ inputs.working_directory }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b4d6e63..cb2090f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,9 +4,9 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v5
         with:
           python-version: 3.9
 
@@ -23,11 +23,12 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.9]
+        python-version: ["3.8", "3.10", "3.12"]
+
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -42,9 +43,9 @@ jobs:
     runs-on: ubuntu-latest
     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v5
         with:
           python-version: 3.9
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3b44894..fe9189d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## neptune-sklearn 2.1.3
+
+### Fixes
+- Monkey patches [`yellowbrick.regression.CooksDistance.draw()`](https://github.com/DistrictDataLabs/yellowbrick/blob/f7a8e950bd31452ea2f5d402a1c5d519cd163fd5/yellowbrick/regressor/influence.py#L184) to remove unsupported `use_line_collection` matplotlib arg ([#28](https://github.com/neptune-ai/neptune-sklearn/pull/28))
+
+### Changes
+- Replaced `print()` with `warnings.warn()` to better capture `stderr`  ([#28](https://github.com/neptune-ai/neptune-sklearn/pull/28))
+
 ## neptune-sklearn 2.1.2
 
 ### Changes
diff --git a/src/neptune_sklearn/impl/__init__.py b/src/neptune_sklearn/impl/__init__.py
index e881975..d82e571 100644
--- a/src/neptune_sklearn/impl/__init__.py
+++ b/src/neptune_sklearn/impl/__init__.py
@@ -86,6 +86,8 @@
     )
     from neptune.new.utils import stringify_unsupported
 
+from warnings import warn
+
 
 def create_regressor_summary(regressor, X_train, X_test, y_train, y_test, nrows=1000, log_charts=True):
     """Creates scikit-learn regressor summary.
@@ -455,7 +457,7 @@ def get_test_preds_proba(classifier, X_test=None, y_pred_proba=None, nrows=1000)
         try:
             y_pred_proba = classifier.predict_proba(X_test)
         except Exception as e:
-            print("This classifier does not provide predictions probabilities. Error: {}".format(e))
+            warn(f"This classifier does not provide predictions probabilities. Error: {e}")
             return
 
     df = pd.DataFrame(data=y_pred_proba, columns=classifier.classes_)
@@ -590,7 +592,7 @@ def create_learning_curve_chart(regressor, X_train, y_train):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log learning curve chart. Error: {}".format(e))
+        warn(f"Did not log learning curve chart. Error: {e}")
 
     return chart
 
@@ -633,7 +635,7 @@ def create_feature_importance_chart(regressor, X_train, y_train):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log feature importance chart. Error: {}".format(e))
+        warn(f"Did not log feature importance chart. Error: {e}")
 
     return chart
 
@@ -678,7 +680,7 @@ def create_residuals_chart(regressor, X_train, X_test, y_train, y_test):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log residuals chart. Error: {}".format(e))
+        warn(f"Did not log residuals chart. Error: {e}")
 
     return chart
 
@@ -723,11 +725,47 @@ def create_prediction_error_chart(regressor, X_train, X_test, y_train, y_test):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log prediction error chart. Error: {}".format(e))
+        warn(f"Did not log prediction error chart. Error: {e}")
 
     return chart
 
 
+def _monkey_draw(self):
+    """
+    Monkey patches `yellowbrick.regressor.CooksDistance.draw()`
+    to remove unsupported matplotlib argument `use_line_collection`.
+
+    Draws a stem plot where each stem is the Cook's Distance of the instance at the
+    index specified by the x axis. Optionaly draws a threshold line.
+    """
+    # Draw a stem plot with the influence for each instance
+    _, _, baseline = self.ax.stem(
+        self.distance_,
+        linefmt=self.linefmt,
+        markerfmt=self.markerfmt,
+        # use_line_collection=True
+    )
+
+    # No padding on either side of the instance index
+    self.ax.set_xlim(0, len(self.distance_))
+
+    # Draw the threshold for most influential points
+    if self.draw_threshold:
+        label = r"{:0.2f}% > $I_t$ ($I_t=\frac {{4}} {{n}}$)".format(self.outlier_percentage_)
+        self.ax.axhline(
+            self.influence_threshold_,
+            ls="--",
+            label=label,
+            c=baseline.get_color(),
+            lw=baseline.get_linewidth(),
+        )
+
+    return self.ax
+
+
+CooksDistance.draw = _monkey_draw
+
+
 def create_cooks_distance_chart(regressor, X_train, y_train):
     """Creates cooks distance chart.
 
@@ -765,7 +803,7 @@ def create_cooks_distance_chart(regressor, X_train, y_train):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log cooks distance chart. Error: {}".format(e))
+        warn(f"Did not log cooks distance chart. Error: {e}")
 
     return chart
 
@@ -812,7 +850,7 @@ def create_classification_report_chart(classifier, X_train, X_test, y_train, y_t
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log Classification Report chart. Error: {}".format(e))
+        warn(f"Did not log Classification Report chart. Error: {e}")
 
     return chart
 
@@ -859,7 +897,7 @@ def create_confusion_matrix_chart(classifier, X_train, X_test, y_train, y_test):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log Confusion Matrix chart. Error: {}".format(e))
+        warn(f"Did not log Confusion Matrix chart. Error: {e}")
 
     return chart
 
@@ -904,7 +942,7 @@ def create_roc_auc_chart(classifier, X_train, X_test, y_train, y_test):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log ROC-AUC chart. Error {}".format(e))
+        warn(f"Did not log ROC-AUC chart. Error {e}")
 
     return chart
 
@@ -943,9 +981,9 @@ def create_precision_recall_chart(classifier, X_test, y_test, y_pred_proba=None)
         try:
             y_pred_proba = classifier.predict_proba(X_test)
         except Exception as e:
-            print(
-                "Did not log Precision-Recall chart: this classifier does not provide predictions probabilities."
-                "Error {}".format(e)
+            warn(
+                f"""Did not log Precision-Recall chart: this classifier does not provide predictions probabilities.
+                Error {e}"""
             )
             return chart
 
@@ -955,7 +993,7 @@ def create_precision_recall_chart(classifier, X_test, y_test, y_pred_proba=None)
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log Precision-Recall chart. Error {}".format(e))
+        warn(f"Did not log Precision-Recall chart. Error {e}")
 
     return chart
 
@@ -1002,7 +1040,7 @@ def create_class_prediction_error_chart(classifier, X_train, X_test, y_train, y_
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log Class Prediction Error chart. Error {}".format(e))
+        warn(f"Did not log Class Prediction Error chart. Error {e}")
 
     return chart
 
@@ -1088,7 +1126,7 @@ def create_kelbow_chart(model, X, **kwargs):
         chart = File.as_image(fig)
         plt.close(fig)
     except Exception as e:
-        print("Did not log KMeans elbow chart. Error {}".format(e))
+        warn(f"Did not log KMeans elbow chart. Error {e}")
 
     return chart
 
@@ -1140,6 +1178,6 @@ def create_silhouette_chart(model, X, **kwargs):
             charts.append(File.as_image(fig))
             plt.close(fig)
         except Exception as e:
-            print("Did not log Silhouette Coefficients chart. Error {}".format(e))
+            warn(f"Did not log Silhouette Coefficients chart. Error {e}")
 
     return FileSeries(charts)
diff --git a/tests/test_e2e.py b/tests/test_e2e.py
index d17c00c..7d4a8e4 100644
--- a/tests/test_e2e.py
+++ b/tests/test_e2e.py
@@ -6,6 +6,7 @@
 except ImportError:
     from neptune.new import Run, init_run
 
+import matplotlib as mpl
 import pytest
 from sklearn.cluster import KMeans
 from sklearn.dummy import (
@@ -16,6 +17,8 @@
 
 import neptune_sklearn as npt_utils
 
+mpl.use("agg")
+
 
 def test_classifier_summary(iris):
     with init_run() as run: