diff --git a/notebooks/Classification_with_Facet.ipynb b/notebooks/Classification_with_Facet.ipynb index da7687349..402b26837 100644 --- a/notebooks/Classification_with_Facet.ipynb +++ b/notebooks/Classification_with_Facet.ipynb @@ -474,7 +474,7 @@ "\n", "# run feature selection using Boruta and report those selected\n", "boruta_pipeline.fit(X=prediab.features, y=prediab.target)\n", - "selected = boruta_pipeline.features_out.to_list()\n", + "selected = boruta_pipeline.features_original_.unique()\n", "selected" ] }, diff --git a/notebooks/Predictive_Maintenance_Regression_with_Facet.ipynb b/notebooks/Predictive_Maintenance_Regression_with_Facet.ipynb index 30e0ff58a..3e63dd9d9 100644 --- a/notebooks/Predictive_Maintenance_Regression_with_Facet.ipynb +++ b/notebooks/Predictive_Maintenance_Regression_with_Facet.ipynb @@ -552,7 +552,7 @@ "source": [ "# fit pipeline and print selected features\n", "selection_pipeline.fit(X=sample.features, y=sample.target)\n", - "print(f\"Selected features: {selection_pipeline.features_out.tolist()}\")" + "print(f\"Selected features: {selection_pipeline.features_out_.tolist()}\")" ] }, { @@ -578,7 +578,7 @@ "outputs": [], "source": [ "# update FACET sample object to only those features Boruta identified as useful\n", - "sample_selected = sample.keep(features=selection_pipeline.features_out)" + "sample_selected = sample.keep(features=selection_pipeline.features_out_)" ] }, { diff --git a/notebooks/Regression_Water_Drilling_Simulation_Example.ipynb b/notebooks/Regression_Water_Drilling_Simulation_Example.ipynb index 0cd83a563..01c381b10 100644 --- a/notebooks/Regression_Water_Drilling_Simulation_Example.ipynb +++ b/notebooks/Regression_Water_Drilling_Simulation_Example.ipynb @@ -294,8 +294,8 @@ "\n", "preprocessing_pipeline.fit(X=sample.features, y=sample.target)\n", "\n", - "print(f\"Selected features: {list(preprocessing_pipeline.features_out)}\")\n", - "sample_selected = sample.keep(preprocessing_pipeline.features_out)" + "print(f\"Selected features: {list(preprocessing_pipeline.features_out_)}\")\n", + "sample_selected = sample.keep(preprocessing_pipeline.features_out_)" ] }, { diff --git a/sphinx/source/tutorial/Classification_with_Facet.ipynb b/sphinx/source/tutorial/Classification_with_Facet.ipynb index c927d1564..ab3ba645c 100644 --- a/sphinx/source/tutorial/Classification_with_Facet.ipynb +++ b/sphinx/source/tutorial/Classification_with_Facet.ipynb @@ -631,7 +631,7 @@ "\n", "# run feature selection using Boruta and report those selected\n", "boruta_pipeline.fit(X=prediab.features, y=prediab.target)\n", - "selected = boruta_pipeline.features_out.to_list()\n", + "selected = boruta_pipeline.features_original_.unique()\n", "selected" ] }, diff --git a/sphinx/source/tutorial/Predictive_Maintenance_Regression_with_Facet.ipynb b/sphinx/source/tutorial/Predictive_Maintenance_Regression_with_Facet.ipynb index 16979aec0..9572e3ea8 100644 --- a/sphinx/source/tutorial/Predictive_Maintenance_Regression_with_Facet.ipynb +++ b/sphinx/source/tutorial/Predictive_Maintenance_Regression_with_Facet.ipynb @@ -659,7 +659,7 @@ "source": [ "# fit pipeline and print selected features\n", "selection_pipeline.fit(X=sample.features, y=sample.target)\n", - "print(f\"Selected features: {selection_pipeline.features_out.tolist()}\")" + "print(f\"Selected features: {selection_pipeline.features_out_.tolist()}\")" ] }, { @@ -685,7 +685,7 @@ "outputs": [], "source": [ "# update FACET sample object to only those features Boruta identified as useful\n", - "sample_selected = sample.keep(features=selection_pipeline.features_out)" + "sample_selected = sample.keep(features=selection_pipeline.features_out_)" ] }, { diff --git a/sphinx/source/tutorial/Regression_Water_Drilling_Simulation_Example.ipynb b/sphinx/source/tutorial/Regression_Water_Drilling_Simulation_Example.ipynb index 45cbb082b..eaf15f182 100644 --- a/sphinx/source/tutorial/Regression_Water_Drilling_Simulation_Example.ipynb +++ b/sphinx/source/tutorial/Regression_Water_Drilling_Simulation_Example.ipynb @@ -533,8 +533,8 @@ "\n", "preprocessing_pipeline.fit(X=sample.features, y=sample.target)\n", "\n", - "print(f\"Selected features: {list(preprocessing_pipeline.features_out)}\")\n", - "sample_selected = sample.keep(preprocessing_pipeline.features_out)" + "print(f\"Selected features: {list(preprocessing_pipeline.features_out_)}\")\n", + "sample_selected = sample.keep(preprocessing_pipeline.features_out_)" ] }, { diff --git a/src/facet/crossfit/_crossfit.py b/src/facet/crossfit/_crossfit.py index a86289cb8..dd9348f20 100644 --- a/src/facet/crossfit/_crossfit.py +++ b/src/facet/crossfit/_crossfit.py @@ -299,7 +299,7 @@ def _fit_score( # generate parameter objects for fitting and/or scoring each split def _generate_parameters() -> Iterator[_FitScoreParameters]: - learner_features = pipeline.features_out + learner_features = pipeline.features_out_ n_learner_features = len(learner_features) test_scores = do_score and not _train_scores models = iter(lambda: None, 0) if do_fit else self.models() diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py index 9ae69cd28..1512c0364 100644 --- a/src/facet/inspection/_inspection.py +++ b/src/facet/inspection/_inspection.py @@ -316,7 +316,7 @@ def features(self) -> List[str]: The names of the features used to fit the learner pipeline explained by this inspector. """ - return self.crossfit.pipeline.features_out.to_list() + return self.crossfit.pipeline.features_out_.to_list() def shap_values(self, consolidate: Optional[str] = "mean") -> pd.DataFrame: """ @@ -689,7 +689,7 @@ def _feature_matrix_to_df(self, matrix: np.ndarray) -> pd.DataFrame: matrix_df = pd.DataFrame( data=matrix_2d, columns=self.shap_values().columns, - index=self.crossfit.pipeline.features_out.rename(Sample.IDX_FEATURE), + index=self.crossfit.pipeline.features_out_.rename(Sample.IDX_FEATURE), ) assert matrix_df.shape == (n_features, n_outputs * n_features) diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py index 2cbf5aafa..d5c7cb773 100644 --- a/src/facet/inspection/_shap.py +++ b/src/facet/inspection/_shap.py @@ -122,7 +122,7 @@ def fit(self: T, crossfit: LearnerCrossfit[T_LearnerPipelineDF], **fit_params) - self.shap_ = None training_sample = crossfit.sample - self.feature_index_ = crossfit.pipeline.features_out.rename(Sample.IDX_FEATURE) + self.feature_index_ = crossfit.pipeline.features_out_.rename(Sample.IDX_FEATURE) self.output_names_ = self._output_names(crossfit=crossfit) self.sample_ = training_sample @@ -230,7 +230,7 @@ def _shap_all_splits( None if background_dataset is None else background_dataset.reindex( - columns=model.final_estimator.features_in, copy=False + columns=model.final_estimator.features_in_, copy=False ) ), ), @@ -361,7 +361,7 @@ def _preprocessed_features( x = model.preprocessing.transform(x) # re-index the features to fit the sequence that was used to fit the learner - return x.reindex(columns=model.final_estimator.features_in, copy=False) + return x.reindex(columns=model.final_estimator.features_in_, copy=False) @staticmethod @abstractmethod diff --git a/test/test/conftest.py b/test/test/conftest.py index c65f3e738..ff9c52495 100644 --- a/test/test/conftest.py +++ b/test/test/conftest.py @@ -175,7 +175,8 @@ def feature_names(best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF]) -> S all unique features across the models in the crossfit, after preprocessing """ return functools.reduce( - operator.or_, (set(model.features_out) for model in best_lgbm_crossfit.models()) + operator.or_, + (set(model.features_out_) for model in best_lgbm_crossfit.models()), )