From fd632e6155af182882acead8e7a6202d4e2881eb Mon Sep 17 00:00:00 2001
From: Alex <wozn0001@e.ntu.edu.sg>
Date: Fri, 12 Jun 2020 11:11:47 +0800
Subject: [PATCH 1/5] add new attribute for number of features

Fixes issue related to #17353 in scikit-learn.
---
 python-package/xgboost/sklearn.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 49f4dfa451d3..c0e69778befd 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -499,6 +499,7 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
 
                 [xgb.callback.reset_learning_rate(custom_rates)]
         """
+        self.n_features_in_ = X.shape[1]
         train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
                                 base_margin=base_margin,
                                 missing=self.missing,
@@ -813,6 +814,7 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
             raise ValueError(
                 'Please reshape the input data X into 2-dimensional matrix.')
         self._features_count = X.shape[1]
+        self.n_features_in_ = self._features_count
         train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
                                 base_margin=base_margin,
                                 missing=self.missing, nthread=self.n_jobs)

From 6e5fac6b7c30b816e880b1e5ba04376fd61bc147 Mon Sep 17 00:00:00 2001
From: a-wozniakowski <wozn0001@e.ntu.edu.sg>
Date: Fri, 12 Jun 2020 17:20:06 +0800
Subject: [PATCH 2/5] add new attribute for number of features

add n_features_in_ attribute and stacking tests
---
 python-package/xgboost/sklearn.py |  5 ++++
 tests/python/test_with_sklearn.py | 50 +++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index c0e69778befd..7b72098d660b 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -500,6 +500,7 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
                 [xgb.callback.reset_learning_rate(custom_rates)]
         """
         self.n_features_in_ = X.shape[1]
+        
         train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
                                 base_margin=base_margin,
                                 missing=self.missing,
@@ -813,8 +814,10 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
             # different ways of reshaping
             raise ValueError(
                 'Please reshape the input data X into 2-dimensional matrix.')
+        
         self._features_count = X.shape[1]
         self.n_features_in_ = self._features_count
+
         train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
                                 base_margin=base_margin,
                                 missing=self.missing, nthread=self.n_jobs)
@@ -1197,6 +1200,8 @@ def _dmat_init(group, **params):
             ret.set_group(group)
             return ret
 
+        self.n_features_in_ = X.shape[1]
+
         train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
                                 base_margin=base_margin,
                                 missing=self.missing, nthread=self.n_jobs)
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 439d89afe826..af113df075b7 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -115,6 +115,56 @@ def test_ranking():
     np.testing.assert_almost_equal(pred, pred_orig)
 
 
+def test_stacking_regression():
+    from sklearn.model_selection import train_test_split
+    from sklearn.datasets import load_diabetes
+    from sklearn.linear_model import RidgeCV
+    from sklearn.ensemble import RandomForestRegressor
+    from sklearn.ensemble import StackingRegressor
+
+    X, y = load_diabetes(return_X_y=True)
+    estimators = [
+        ('gbm', xgb.XGBRegressor(objective='reg:squarederror')), ('lr', RidgeCV())
+    ]
+    reg = StackingRegressor(
+        estimators=estimators,
+        final_estimator=RandomForestRegressor(n_estimators=10,
+                                              random_state=42)
+    )
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+    reg.fit(X_train, y_train).score(X_test, y_test)
+
+    # test number of input features
+    assert reg.n_features_in_ == 10
+
+
+def test_stacking_classification():
+    from sklearn.model_selection import train_test_split
+    from sklearn.datasets import load_iris
+    from sklearn.svm import LinearSVC
+    from sklearn.linear_model import LogisticRegression
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.pipeline import make_pipeline
+    from sklearn.ensemble import StackingClassifier
+
+    X, y = load_iris(return_X_y=True)
+    estimators = [
+        ('gbm', xgb.XGBClassifier()),
+        ('svr', make_pipeline(StandardScaler(),
+                              LinearSVC(random_state=42)))
+    ]
+    clf = StackingClassifier(
+        estimators=estimators, final_estimator=LogisticRegression()
+    )
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+    clf.fit(X_train, y_train).score(X_test, y_test)
+
+    # test number of input features
+    assert clf.n_features_in_ == 4
+
+
 @pytest.mark.skipif(**tm.no_pandas())
 def test_feature_importances_weight():
     from sklearn.datasets import load_digits

From 2acea9de99af41622af08487af0dcbe35e7969e4 Mon Sep 17 00:00:00 2001
From: a-wozniakowski <wozn0001@e.ntu.edu.sg>
Date: Fri, 12 Jun 2020 17:53:34 +0800
Subject: [PATCH 3/5] add new attribute for number of features

---
 python-package/xgboost/sklearn.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 7b72098d660b..ef22f4309ed7 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -500,7 +500,7 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
                 [xgb.callback.reset_learning_rate(custom_rates)]
         """
         self.n_features_in_ = X.shape[1]
-        
+
         train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
                                 base_margin=base_margin,
                                 missing=self.missing,
@@ -814,7 +814,7 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
             # different ways of reshaping
             raise ValueError(
                 'Please reshape the input data X into 2-dimensional matrix.')
-        
+
         self._features_count = X.shape[1]
         self.n_features_in_ = self._features_count
 

From 2ca1f71ddb047f5541f6fed36bdc9376794a3824 Mon Sep 17 00:00:00 2001
From: a-wozniakowski <wozn0001@e.ntu.edu.sg>
Date: Fri, 12 Jun 2020 18:50:02 +0800
Subject: [PATCH 4/5] update stacking tests

---
 tests/python/test_with_sklearn.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index af113df075b7..fc052cb57559 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -124,7 +124,8 @@ def test_stacking_regression():
 
     X, y = load_diabetes(return_X_y=True)
     estimators = [
-        ('gbm', xgb.XGBRegressor(objective='reg:squarederror')), ('lr', RidgeCV())
+        ('gbm', xgb.sklearn.XGBRegressor(objective='reg:squarederror')),
+        ('lr', RidgeCV())
     ]
     reg = StackingRegressor(
         estimators=estimators,
@@ -150,7 +151,7 @@ def test_stacking_classification():
 
     X, y = load_iris(return_X_y=True)
     estimators = [
-        ('gbm', xgb.XGBClassifier()),
+        ('gbm', xgb.sklearn.XGBClassifier()),
         ('svr', make_pipeline(StandardScaler(),
                               LinearSVC(random_state=42)))
     ]

From eaf2b3e98f90eb30b0917a295b8568e51a0e56af Mon Sep 17 00:00:00 2001
From: a-wozniakowski <wozn0001@e.ntu.edu.sg>
Date: Fri, 12 Jun 2020 19:54:08 +0800
Subject: [PATCH 5/5] update stacking tests

---
 tests/python/test_with_sklearn.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index fc052cb57559..0bb5b3ada000 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -136,9 +136,6 @@ def test_stacking_regression():
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
     reg.fit(X_train, y_train).score(X_test, y_test)
 
-    # test number of input features
-    assert reg.n_features_in_ == 10
-
 
 def test_stacking_classification():
     from sklearn.model_selection import train_test_split
@@ -162,9 +159,6 @@ def test_stacking_classification():
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
     clf.fit(X_train, y_train).score(X_test, y_test)
 
-    # test number of input features
-    assert clf.n_features_in_ == 4
-
 
 @pytest.mark.skipif(**tm.no_pandas())
 def test_feature_importances_weight():