rename remaining references to properties features_in_ and features_o…

…ut_ (#72)
BCG-X-Official · Sep 23, 2020 · f43fb9f · f43fb9f
1 parent 5bb0da9
commit f43fb9f
Show file tree

Hide file tree

Showing 10 changed files with 18 additions and 17 deletions.
diff --git a/notebooks/Classification_with_Facet.ipynb b/notebooks/Classification_with_Facet.ipynb
@@ -474,7 +474,7 @@
                 "\n",
                 "# run feature selection using Boruta and report those selected\n",
                 "boruta_pipeline.fit(X=prediab.features, y=prediab.target)\n",
-                "selected = boruta_pipeline.features_out.to_list()\n",
+                "selected = boruta_pipeline.features_original_.unique()\n",
                 "selected"
             ]
         },

diff --git a/notebooks/Predictive_Maintenance_Regression_with_Facet.ipynb b/notebooks/Predictive_Maintenance_Regression_with_Facet.ipynb
@@ -552,7 +552,7 @@
             "source": [
                 "# fit pipeline and print selected features\n",
                 "selection_pipeline.fit(X=sample.features, y=sample.target)\n",
-                "print(f\"Selected features: {selection_pipeline.features_out.tolist()}\")"
+                "print(f\"Selected features: {selection_pipeline.features_out_.tolist()}\")"
             ]
         },
         {
@@ -578,7 +578,7 @@
             "outputs": [],
             "source": [
                 "# update FACET sample object to only those features Boruta identified as useful\n",
-                "sample_selected = sample.keep(features=selection_pipeline.features_out)"
+                "sample_selected = sample.keep(features=selection_pipeline.features_out_)"
             ]
         },
         {

diff --git a/notebooks/Regression_Water_Drilling_Simulation_Example.ipynb b/notebooks/Regression_Water_Drilling_Simulation_Example.ipynb
@@ -294,8 +294,8 @@
                 "\n",
                 "preprocessing_pipeline.fit(X=sample.features, y=sample.target)\n",
                 "\n",
-                "print(f\"Selected features: {list(preprocessing_pipeline.features_out)}\")\n",
-                "sample_selected = sample.keep(preprocessing_pipeline.features_out)"
+                "print(f\"Selected features: {list(preprocessing_pipeline.features_out_)}\")\n",
+                "sample_selected = sample.keep(preprocessing_pipeline.features_out_)"
             ]
         },
         {

diff --git a/sphinx/source/tutorial/Classification_with_Facet.ipynb b/sphinx/source/tutorial/Classification_with_Facet.ipynb
@@ -631,7 +631,7 @@
     "\n",
     "# run feature selection using Boruta and report those selected\n",
     "boruta_pipeline.fit(X=prediab.features, y=prediab.target)\n",
-    "selected = boruta_pipeline.features_out.to_list()\n",
+    "selected = boruta_pipeline.features_original_.unique()\n",
     "selected"
    ]
   },

diff --git a/sphinx/source/tutorial/Predictive_Maintenance_Regression_with_Facet.ipynb b/sphinx/source/tutorial/Predictive_Maintenance_Regression_with_Facet.ipynb
@@ -659,7 +659,7 @@
    "source": [
     "# fit pipeline and print selected features\n",
     "selection_pipeline.fit(X=sample.features, y=sample.target)\n",
-    "print(f\"Selected features: {selection_pipeline.features_out.tolist()}\")"
+    "print(f\"Selected features: {selection_pipeline.features_out_.tolist()}\")"
    ]
   },
   {
@@ -685,7 +685,7 @@
    "outputs": [],
    "source": [
     "# update FACET sample object to only those features Boruta identified as useful\n",
-    "sample_selected = sample.keep(features=selection_pipeline.features_out)"
+    "sample_selected = sample.keep(features=selection_pipeline.features_out_)"
    ]
   },
   {

diff --git a/sphinx/source/tutorial/Regression_Water_Drilling_Simulation_Example.ipynb b/sphinx/source/tutorial/Regression_Water_Drilling_Simulation_Example.ipynb
@@ -533,8 +533,8 @@
     "\n",
     "preprocessing_pipeline.fit(X=sample.features, y=sample.target)\n",
     "\n",
-    "print(f\"Selected features: {list(preprocessing_pipeline.features_out)}\")\n",
-    "sample_selected = sample.keep(preprocessing_pipeline.features_out)"
+    "print(f\"Selected features: {list(preprocessing_pipeline.features_out_)}\")\n",
+    "sample_selected = sample.keep(preprocessing_pipeline.features_out_)"
    ]
   },
   {

diff --git a/src/facet/crossfit/_crossfit.py b/src/facet/crossfit/_crossfit.py
@@ -299,7 +299,7 @@ def _fit_score(
         # generate parameter objects for fitting and/or scoring each split
 
         def _generate_parameters() -> Iterator[_FitScoreParameters]:
-            learner_features = pipeline.features_out
+            learner_features = pipeline.features_out_
             n_learner_features = len(learner_features)
             test_scores = do_score and not _train_scores
             models = iter(lambda: None, 0) if do_fit else self.models()

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
@@ -316,7 +316,7 @@ def features(self) -> List[str]:
         The names of the features used to fit the learner pipeline explained by this
         inspector.
         """
-        return self.crossfit.pipeline.features_out.to_list()
+        return self.crossfit.pipeline.features_out_.to_list()
 
     def shap_values(self, consolidate: Optional[str] = "mean") -> pd.DataFrame:
         """
@@ -689,7 +689,7 @@ def _feature_matrix_to_df(self, matrix: np.ndarray) -> pd.DataFrame:
         matrix_df = pd.DataFrame(
             data=matrix_2d,
             columns=self.shap_values().columns,
-            index=self.crossfit.pipeline.features_out.rename(Sample.IDX_FEATURE),
+            index=self.crossfit.pipeline.features_out_.rename(Sample.IDX_FEATURE),
         )
 
         assert matrix_df.shape == (n_features, n_outputs * n_features)

diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
@@ -122,7 +122,7 @@ def fit(self: T, crossfit: LearnerCrossfit[T_LearnerPipelineDF], **fit_params) -
         self.shap_ = None
 
         training_sample = crossfit.sample
-        self.feature_index_ = crossfit.pipeline.features_out.rename(Sample.IDX_FEATURE)
+        self.feature_index_ = crossfit.pipeline.features_out_.rename(Sample.IDX_FEATURE)
         self.output_names_ = self._output_names(crossfit=crossfit)
         self.sample_ = training_sample
 
@@ -230,7 +230,7 @@ def _shap_all_splits(
                             None
                             if background_dataset is None
                             else background_dataset.reindex(
-                                columns=model.final_estimator.features_in, copy=False
+                                columns=model.final_estimator.features_in_, copy=False
                             )
                         ),
                     ),
@@ -361,7 +361,7 @@ def _preprocessed_features(
             x = model.preprocessing.transform(x)
 
         # re-index the features to fit the sequence that was used to fit the learner
-        return x.reindex(columns=model.final_estimator.features_in, copy=False)
+        return x.reindex(columns=model.final_estimator.features_in_, copy=False)
 
     @staticmethod
     @abstractmethod

diff --git a/test/test/conftest.py b/test/test/conftest.py
@@ -175,7 +175,8 @@ def feature_names(best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF]) -> S
     all unique features across the models in the crossfit, after preprocessing
     """
     return functools.reduce(
-        operator.or_, (set(model.features_out) for model in best_lgbm_crossfit.models())
+        operator.or_,
+        (set(model.features_out_) for model in best_lgbm_crossfit.models()),
     )