From b86096032b3b57c9a8a9015540c1042bfb106719 Mon Sep 17 00:00:00 2001
From: fis <jm.yuan@outlook.com>
Date: Wed, 29 Jul 2020 12:23:17 +0800
Subject: [PATCH] Disable feature validation on sklearn predict prob.

---
 python-package/xgboost/sklearn.py |  2 +-
 tests/python/test_with_sklearn.py | 32 +++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index ef22f4309ed7..1f3033f2d29e 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -909,7 +909,7 @@ def predict(self, data, output_margin=False, ntree_limit=None,
             'Label encoder is not defined.  Returning class probability.')
         return class_probs
 
-    def predict_proba(self, data, ntree_limit=None, validate_features=True,
+    def predict_proba(self, data, ntree_limit=None, validate_features=False,
                       base_margin=None):
         """
         Predict the probability of each `data` example being of a given class.
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 06742d4ca9f2..7f62a3e83052 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -888,6 +888,38 @@ def test_parameter_validation():
     assert len(output) == 0
 
 
+@pytest.mark.skipif(**tm.no_pandas())
+def test_pandas_input():
+    import pandas as pd
+    from sklearn.calibration import CalibratedClassifierCV
+    rng = np.random.RandomState(1994)
+
+    kRows = 100
+    kCols = 6
+
+    X = rng.randint(low=0, high=2, size=kRows*kCols)
+    X = X.reshape(kRows, kCols)
+
+    df = pd.DataFrame(X)
+    feature_names = []
+    for i in range(1, kCols):
+        feature_names += ['k'+str(i)]
+
+    df.columns = ['status'] + feature_names
+
+    target = df['status']
+    train = df.drop(columns=['status'])
+    model = xgb.XGBClassifier()
+    model.fit(train, target)
+    clf_isotonic = CalibratedClassifierCV(model,
+                                          cv='prefit', method='isotonic')
+    clf_isotonic.fit(train, target)
+    assert isinstance(clf_isotonic.calibrated_classifiers_[0].base_estimator,
+                      xgb.XGBClassifier)
+    np.testing.assert_allclose(np.array(clf_isotonic.classes_),
+                               np.array([0, 1]))
+
+
 class TestBoostFromPrediction(unittest.TestCase):
     def run_boost_from_prediction(self, tree_method):
         from sklearn.datasets import load_breast_cancer