From b86096032b3b57c9a8a9015540c1042bfb106719 Mon Sep 17 00:00:00 2001 From: fis <jm.yuan@outlook.com> Date: Wed, 29 Jul 2020 12:23:17 +0800 Subject: [PATCH] Disable feature validation on sklearn predict prob. --- python-package/xgboost/sklearn.py | 2 +- tests/python/test_with_sklearn.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index ef22f4309ed7..1f3033f2d29e 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -909,7 +909,7 @@ def predict(self, data, output_margin=False, ntree_limit=None, 'Label encoder is not defined. Returning class probability.') return class_probs - def predict_proba(self, data, ntree_limit=None, validate_features=True, + def predict_proba(self, data, ntree_limit=None, validate_features=False, base_margin=None): """ Predict the probability of each `data` example being of a given class. diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 06742d4ca9f2..7f62a3e83052 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -888,6 +888,38 @@ def test_parameter_validation(): assert len(output) == 0 +@pytest.mark.skipif(**tm.no_pandas()) +def test_pandas_input(): + import pandas as pd + from sklearn.calibration import CalibratedClassifierCV + rng = np.random.RandomState(1994) + + kRows = 100 + kCols = 6 + + X = rng.randint(low=0, high=2, size=kRows*kCols) + X = X.reshape(kRows, kCols) + + df = pd.DataFrame(X) + feature_names = [] + for i in range(1, kCols): + feature_names += ['k'+str(i)] + + df.columns = ['status'] + feature_names + + target = df['status'] + train = df.drop(columns=['status']) + model = xgb.XGBClassifier() + model.fit(train, target) + clf_isotonic = CalibratedClassifierCV(model, + cv='prefit', method='isotonic') + clf_isotonic.fit(train, target) + assert isinstance(clf_isotonic.calibrated_classifiers_[0].base_estimator, + xgb.XGBClassifier) + np.testing.assert_allclose(np.array(clf_isotonic.classes_), + np.array([0, 1])) + + class TestBoostFromPrediction(unittest.TestCase): def run_boost_from_prediction(self, tree_method): from sklearn.datasets import load_breast_cancer