BCG-X-Official · j-ittner · Sep 22, 2020 · Sep 21, 2020
diff --git a/README.rst b/README.rst
@@ -125,14 +125,14 @@ fit_transform on our preprocessing pipeline.
 Tracing features from post-transform to original 
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The sklearndf pipeline has a features_original attribute which returns a series mapping
+The sklearndf pipeline has a `features_original_` attribute which returns a series mapping
 the output columns (the series' index) to the input columns (the series' values).
 We can therefore easily select all output features generated from a given input feature,
 such as in this case for embarked.
 
 .. code-block:: Python
 
-    embarked_type_derivatives = preprocessing_df.features_original == "embarked"
+    embarked_type_derivatives = preprocessing_df.features_original_ == "embarked"
     transformed_df.loc[:, embarked_type_derivatives].head()
 
 

diff --git a/sphinx/source/tutorial/sklearndf_tutorial.ipynb b/sphinx/source/tutorial/sklearndf_tutorial.ipynb
@@ -639,7 +639,7 @@
     "raw_mimetype": "text/restructuredtext"
    },
    "source": [
-    "The `~sklearndf.transformation.ColumnTransformerDF.features_original` attribute returns a series mapping the output columns (the series' index) to the input columns (the series' values):"
+    "The `~sklearndf.transformation.ColumnTransformerDF.features_original_` attribute returns a series mapping the output columns (the series' index) to the input columns (the series' values):"
    ]
   },
   {
@@ -749,7 +749,7 @@
     }
    ],
    "source": [
-    "preprocessing_df.features_original.to_frame().head(10)"
+    "preprocessing_df.features_original_.to_frame().head(10)"
    ]
   },
   {
@@ -886,7 +886,7 @@
     }
    ],
    "source": [
-    "garage_type_derivatives = preprocessing_df.features_original == \"GarageType\"\n",
+    "garage_type_derivatives = preprocessing_df.features_original_ == \"GarageType\"\n",
     "\n",
     "transformed_df.loc[:, garage_type_derivatives].head()"
    ]
@@ -1183,7 +1183,7 @@
     "raw_mimetype": "text/restructuredtext"
    },
    "source": [
-    "Property `is_fitted` tells if the regressor is fitted, and -- for fitted estimators -- property `features_in` returns the names of the ingoing features as a pandas index."
+    "Property `is_fitted` tells if the regressor is fitted, and -- for fitted estimators -- property `features_in_` returns the names of the ingoing features as a pandas index."
    ]
   },
   {
@@ -1246,7 +1246,7 @@
     }
    ],
    "source": [
-    "random_forest_regressor_df.features_in"
+    "random_forest_regressor_df.features_in_"
    ]
   },
   {
@@ -1987,7 +1987,7 @@
     }
    ],
    "source": [
-    "boruta_pipeline.features_out.to_list()"
+    "boruta_pipeline.features_out_.to_list()"
    ]
   },
   {
@@ -1996,7 +1996,7 @@
     "raw_mimetype": "text/restructuredtext"
    },
    "source": [
-    "`sklearndf` allows us to trace outgoing features back to the original features from which they were derived, using the `~sklearndf.TransformerDF.features_original` property. This is useful here as we want to know which features to eliminate before putting them into the pipeline.\n",
+    "`sklearndf` allows us to trace outgoing features back to the original features from which they were derived, using the `~sklearndf.TransformerDF.features_original_` property. This is useful here as we want to know which features to eliminate before putting them into the pipeline.\n",
     "\n",
     "In our example, feature `BsmtQual_Ex` is a derivative of feature `BsmtQual`, obtained through one-hot encoding: "
    ]
@@ -2130,7 +2130,7 @@
     }
    ],
    "source": [
-    "boruta_pipeline.features_original.to_frame()"
+    "boruta_pipeline.features_original_.to_frame()"
    ]
   },
   {
@@ -2160,7 +2160,7 @@
     }
    ],
    "source": [
-    "boruta_pipeline.features_original.unique()"
+    "boruta_pipeline.features_original_.unique()"
    ]
   }
  ],

diff --git a/src/sklearndf/__init__.py b/src/sklearndf/__init__.py
@@ -17,11 +17,11 @@
 :meth:`~TransformerDF.transform`, :meth:`~LearnerDF.predict`, and so on.
 
 All estimators enhanced by `sklearndf` also implement an additional attribute
-:attr:`~EstimatorDF.features_in`, keeping track of the column names of the data
+:attr:`~EstimatorDF.features_in_`, keeping track of the column names of the data
 frame used to fit the estimator.
 
-`sklearndf` transformers also implement :attr:`~TransformerDF.features_out` and
-:attr:`~TransformerDF.features_original`, keeping track of the feature names of the
+`sklearndf` transformers also implement :attr:`~TransformerDF.features_out_` and
+:attr:`~TransformerDF.features_original_`, keeping track of the feature names of the
 transformed outputs as well as mapping output features back to the input features.
 This  enables tracing features back to the original inputs even across complex
 pipelines (see allso :class:`.PipelineDF`)

diff --git a/src/sklearndf/_sklearndf.py b/src/sklearndf/_sklearndf.py
@@ -89,7 +89,7 @@ def fit(
         pass
 
     @property
-    def features_in(self) -> pd.Index:
+    def features_in_(self) -> pd.Index:
         """
         The pandas column index with the names of the features used to fit this
         estimator.
@@ -100,7 +100,7 @@ def features_in(self) -> pd.Index:
         return self._get_features_in().rename(self.COL_FEATURE_IN)
 
     @property
-    def n_outputs(self) -> int:
+    def n_outputs_(self) -> int:
         """
         The number of outputs used to fit this estimator.
 
@@ -226,7 +226,7 @@ def __init__(self, *args, **kwargs) -> None:
         self._features_original = None
 
     @property
-    def features_original(self) -> pd.Series:
+    def features_original_(self) -> pd.Series:
         """
         A pandas series, mapping the output features resulting from the transformation
         to the original input features.
@@ -244,7 +244,7 @@ def features_original(self) -> pd.Series:
         return self._features_original
 
     @property
-    def features_out(self) -> pd.Index:
+    def features_out_(self) -> pd.Index:
         """
         A pandas column index with the names of the features produced by this
         transformer
@@ -307,8 +307,8 @@ def _get_features_original(self) -> pd.Series:
 
     def _get_features_out(self) -> pd.Index:
         # return a pandas index with this transformer's output columns
-        # default behaviour: get index returned by features_original
-        return self.features_original.index
+        # default behaviour: get index returned by features_original_
+        return self.features_original_.index
 
 
 class RegressorDF(LearnerDF, RegressorMixin, metaclass=ABCMeta):
@@ -393,13 +393,12 @@ def decision_function(
         """
 
     @property
-    def classes(self) -> Sequence[Any]:
+    @abstractmethod
+    def classes_(self) -> Sequence[Any]:
         """
         Get the classes predicted by this classifier.
         By default expects classes as a list-like stored in the `classes_` attribute.
 
         :return: the classes predicted by this classifier
         """
-        self._ensure_fitted()
-        # noinspection PyUnresolvedReferences
-        return self.classes_
+        pass
diff --git a/src/sklearndf/_wrapper.py b/src/sklearndf/_wrapper.py
@@ -7,8 +7,8 @@
 native estimators they wrap.
 
 The wrappers also implement the additional column attributes introduced by `sklearndf`,
-:meth:`~EstimatorDF.features_in`, :meth:`~TransformerDF.features_out`, and
-:meth:`~TransformerDF.features_original`.
+:meth:`~EstimatorDF.features_in_`, :meth:`~TransformerDF.features_out_`, and
+:meth:`~TransformerDF.features_original_`.
 """
 
 import inspect
@@ -233,7 +233,7 @@ def _check_parameter_types(
             raise TypeError("arg X must be a DataFrame")
         if self.is_fitted:
             _EstimatorWrapperDF._verify_df(
-                df_name="X argument", df=X, expected_columns=self.features_in
+                df_name="X argument", df=X, expected_columns=self.features_in_
             )
         if y is not None and not isinstance(y, (pd.Series, pd.DataFrame)):
             raise TypeError("arg y must be None, or a pandas Series or DataFrame")
@@ -346,7 +346,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         transformed = self._transform(X)
 
         return self._transformed_to_df(
-            transformed=transformed, index=X.index, columns=self.features_out
+            transformed=transformed, index=X.index, columns=self.features_out_
         )
 
     # noinspection PyPep8Naming
@@ -368,7 +368,7 @@ def fit_transform(
             ) from cause
 
         return self._transformed_to_df(
-            transformed=transformed, index=X.index, columns=self.features_out
+            transformed=transformed, index=X.index, columns=self.features_out_
         )
 
     # noinspection PyPep8Naming
@@ -381,7 +381,7 @@ def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
         transformed = self._inverse_transform(X)
 
         return self._transformed_to_df(
-            transformed=transformed, index=X.index, columns=self.features_in
+            transformed=transformed, index=X.index, columns=self.features_in_
         )
 
     def _reset_fit(self) -> None:
@@ -552,6 +552,13 @@ class _ClassifierWrapperDF(
     Wrapper around sklearn classifiers that preserves data frames.
     """
 
+    @property
+    def classes_(self) -> Sequence[Any]:
+        """[see superclass]"""
+        self._ensure_fitted()
+        # noinspection PyUnresolvedReferences
+        return self._delegate_estimator.classes_
+
     # noinspection PyPep8Naming
     def predict_proba(
         self, X: pd.DataFrame, **predict_params

diff --git a/src/sklearndf/classification/_classification.py b/src/sklearndf/classification/_classification.py
@@ -629,7 +629,7 @@ def _prediction_with_class_labels(
         classes: Optional[Sequence[Any]] = None,
     ) -> Union[pd.Series, pd.DataFrame, List[pd.DataFrame]]:
         return super()._prediction_with_class_labels(
-            X=X, y=y, classes=range(self.n_outputs)
+            X=X, y=y, classes=range(self.n_outputs_)
         )
 
 

diff --git a/src/sklearndf/pipeline/_learner_pipeline.py b/src/sklearndf/pipeline/_learner_pipeline.py
@@ -125,12 +125,12 @@ def features_out(self) -> pd.Index:
         """
         Pandas column index of all features resulting from the preprocessing step.
 
-        Same as :attr:`.features_in` if the preprocessing step is ``None``.
+        Same as :attr:`.features_in_` if the preprocessing step is ``None``.
         """
         if self.preprocessing is not None:
-            return self.preprocessing.features_out
+            return self.preprocessing.features_out_
         else:
-            return self.features_in.rename(TransformerDF.COL_FEATURE_OUT)
+            return self.features_in_.rename(TransformerDF.COL_FEATURE_OUT)
 
     @property
     def is_fitted(self) -> bool:
@@ -141,15 +141,15 @@ def is_fitted(self) -> bool:
 
     def _get_features_in(self) -> pd.Index:
         if self.preprocessing is not None:
-            return self.preprocessing.features_in
+            return self.preprocessing.features_in_
         else:
-            return self.final_estimator.features_in
+            return self.final_estimator.features_in_
 
     def _get_n_outputs(self) -> int:
         if self.preprocessing is not None:
-            return self.preprocessing.n_outputs
+            return self.preprocessing.n_outputs_
         else:
-            return self.final_estimator.n_outputs
+            return self.final_estimator.n_outputs_
 
     # noinspection PyPep8Naming
     def _pre_transform(self, X: pd.DataFrame) -> pd.DataFrame:
@@ -282,6 +282,11 @@ def __init__(
             )
         self.classifier = classifier
 
+    @property
+    def classes_(self) -> Sequence[Any]:
+        """[see superclass]"""
+        return self.final_estimator.classes_
+
     @property
     def final_estimator(self) -> T_FinalClassifierDF:
         """[see superclass]"""

diff --git a/src/sklearndf/pipeline/_pipeline.py b/src/sklearndf/pipeline/_pipeline.py
@@ -138,12 +138,12 @@ def _iter_not_none(
 
     def _get_features_original(self) -> pd.Series:
         col_mappings = [
-            df_transformer.features_original
+            df_transformer.features_original_
             for _, df_transformer in self._transformer_steps()
         ]
 
         if len(col_mappings) == 0:
-            _features_out: pd.Index = self.features_in
+            _features_out: pd.Index = self.features_in_
             _features_original: Union[np.ndarray, ExtensionArray] = _features_out.values
         else:
             _features_out: pd.Index = col_mappings[-1].index
@@ -175,9 +175,9 @@ def _get_features_original(self) -> pd.Series:
     def _get_features_out(self) -> pd.Index:
         for _, transformer in reversed(self.steps):
             if isinstance(transformer, TransformerDF):
-                return transformer.features_out
+                return transformer.features_out_
 
-        return self.features_in
+        return self.features_in_
 
 
 # noinspection PyAbstractClass
@@ -217,7 +217,7 @@ def _prepend_features_original(
         return pd.concat(
             objs=(
                 _prepend_features_original(
-                    features_original=transformer.features_original, name_prefix=name
+                    features_original=transformer.features_original_, name_prefix=name
                 )
                 for name, transformer, _ in self.native_estimator._iter()
             )
@@ -233,7 +233,7 @@ def _get_features_out(self) -> pd.Index:
         # noinspection PyProtectedMember
         indices = [
             self._prepend_features_out(
-                features_out=transformer.features_out, name_prefix=name
+                features_out=transformer.features_out_, name_prefix=name
             )
             for name, transformer, _ in self.native_estimator._iter()
         ]