Skip to content

Commit

Permalink
Remove n_pairs parameter from KhiopsRegressor
Browse files Browse the repository at this point in the history
It was never supported.
  • Loading branch information
folmos-at-orange committed Dec 19, 2024
1 parent eb1d345 commit faf9b44
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 78 deletions.
111 changes: 60 additions & 51 deletions khiops/sklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,6 @@ class KhiopsSupervisedEstimator(KhiopsEstimator):
def __init__(
self,
n_features=100,
n_pairs=0,
n_trees=10,
specific_pairs=None,
all_possible_pairs=True,
Expand All @@ -1407,7 +1406,6 @@ def __init__(
internal_sort=internal_sort,
)
self.n_features = n_features
self.n_pairs = n_pairs
self.n_trees = n_trees
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
Expand Down Expand Up @@ -1497,25 +1495,6 @@ def _fit_check_params(self, ds, **kwargs):
raise TypeError(type_error_message("n_trees", self.n_trees, int))
if self.n_trees < 0:
raise ValueError("'n_trees' must be positive")
if not isinstance(self.n_pairs, int):
raise TypeError(type_error_message("n_pairs", self.n_pairs, int))
if self.n_pairs < 0:
raise ValueError("'n_pairs' must be positive")
if self.specific_pairs is not None:
if not is_list_like(self.specific_pairs):
raise TypeError(
type_error_message(
"specific_pairs", self.specific_pairs, "list-like"
)
)
else:
for pair in self.specific_pairs:
if not isinstance(pair, tuple):
raise TypeError(type_error_message(pair, pair, tuple))
if not isinstance(self.all_possible_pairs, bool):
raise TypeError(
type_error_message("all_possible_pairs", self.all_possible_pairs, bool)
)
if self.construction_rules is not None:
if not is_list_like(self.construction_rules):
raise TypeError(
Expand Down Expand Up @@ -1602,7 +1581,6 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):

# Rename parameters to be compatible with khiops.core
kwargs["max_constructed_variables"] = kwargs.pop("n_features")
kwargs["max_pairs"] = kwargs.pop("n_pairs")
kwargs["max_trees"] = kwargs.pop("n_trees")

# Add the additional_data_tables parameter
Expand Down Expand Up @@ -1766,7 +1744,6 @@ class KhiopsPredictor(KhiopsSupervisedEstimator):
def __init__(
self,
n_features=100,
n_pairs=0,
n_trees=10,
n_selected_features=0,
n_evaluated_features=0,
Expand All @@ -1781,7 +1758,6 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
Expand Down Expand Up @@ -2061,19 +2037,19 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
key=key,
internal_sort=internal_sort,
)
self.n_pairs = n_pairs
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
self.group_target_value = group_target_value
self._khiops_model_prefix = "SNB_"
self._predicted_target_meta_data_tag = "Prediction"
Expand Down Expand Up @@ -2119,12 +2095,44 @@ def _fit_check_params(self, ds, **kwargs):
# Call parent method
super()._fit_check_params(ds, **kwargs)

# Check the pair related parameters
if not isinstance(self.n_pairs, int):
raise TypeError(type_error_message("n_pairs", self.n_pairs, int))
if self.n_pairs < 0:
raise ValueError("'n_pairs' must be positive")
if self.specific_pairs is not None:
if not is_list_like(self.specific_pairs):
raise TypeError(
type_error_message(
"specific_pairs", self.specific_pairs, "list-like"
)
)
else:
for pair in self.specific_pairs:
if not isinstance(pair, tuple):
raise TypeError(type_error_message(pair, pair, tuple))
if not isinstance(self.all_possible_pairs, bool):
raise TypeError(
type_error_message("all_possible_pairs", self.all_possible_pairs, bool)
)

# Check 'group_target_value' parameter
if not isinstance(self.group_target_value, bool):
raise TypeError(
type_error_message("group_target_value", self.group_target_value, bool)
)

def _fit_prepare_training_function_inputs(self, ds, computation_dir):
# Call the parent method
args, kwargs = super()._fit_prepare_training_function_inputs(
ds, computation_dir
)

# Rename parameters to be compatible with khiops.core
kwargs["max_pairs"] = kwargs.pop("n_pairs")

return args, kwargs

def fit(self, X, y, **kwargs):
"""Fits a Selective Naive Bayes classifier according to X, y
Expand Down Expand Up @@ -2406,27 +2414,12 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
n_features : int, default 100
*Multi-table only* : Maximum number of multi-table aggregate features to
construct. See :doc:`/multi_table_primer` for more details.
n_pairs : int, default 0
Maximum number of pair features to construct. These features are a 2D grid
partition feature pair. The grid is optimized such that in each cell the target
distribution is well approximated by a constant histogram. Only pairs that are
jointly more informative than its marginals may be taken into account in the
regressor.
n_selected_features : int, default 0
Maximum number of features to be selected in the SNB predictor. If equal to
0 it selects all the features kept in the training.
n_evaluated_features : int, default 0
Maximum number of features to be evaluated in the SNB predictor training. If
equal to 0 it evaluates all informative features.
specific_pairs : list of tuple, optional
User-specified pairs as a list of 2-tuples of feature names. If a given tuple
contains only one non-empty feature name, then it generates all the pairs
containing it (within the maximum limit n_pairs). These pairs have top
priority: they are constructed first.
all_possible_pairs : bool, default True
If ``True`` tries to create all possible pairs within the limit ``n_pairs``.
Pairs specified with ``specific_pairs`` have top priority: they are constructed
first.
construction_rules : list of str, optional
Allowed rules for the automatic feature construction. If not set, it uses all
possible rules.
Expand Down Expand Up @@ -2508,12 +2501,9 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
def __init__(
self,
n_features=100,
n_pairs=0,
n_trees=0,
n_selected_features=0,
n_evaluated_features=0,
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
verbose=False,
output_dir=None,
Expand All @@ -2523,12 +2513,9 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
verbose=verbose,
output_dir=output_dir,
Expand Down Expand Up @@ -2820,17 +2807,17 @@ def __init__(
):
super().__init__(
n_features=n_features,
n_pairs=n_pairs,
n_trees=n_trees,
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
key=key,
internal_sort=internal_sort,
)
self.n_pairs = n_pairs
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
self.categorical_target = categorical_target
self.group_target_value = group_target_value
self.transform_type_categorical = transform_type_categorical
Expand Down Expand Up @@ -2903,6 +2890,27 @@ def _fit_check_params(self, ds, **kwargs):
# Call parent method
super()._fit_check_params(ds, **kwargs)

# Check the pair related parameters
if not isinstance(self.n_pairs, int):
raise TypeError(type_error_message("n_pairs", self.n_pairs, int))
if self.n_pairs < 0:
raise ValueError("'n_pairs' must be positive")
if self.specific_pairs is not None:
if not is_list_like(self.specific_pairs):
raise TypeError(
type_error_message(
"specific_pairs", self.specific_pairs, "list-like"
)
)
else:
for pair in self.specific_pairs:
if not isinstance(pair, tuple):
raise TypeError(type_error_message(pair, pair, tuple))
if not isinstance(self.all_possible_pairs, bool):
raise TypeError(
type_error_message("all_possible_pairs", self.all_possible_pairs, bool)
)

# Check 'transform_type_categorical' parameter
if not isinstance(self.transform_type_categorical, str):
raise TypeError(
Expand Down Expand Up @@ -3025,6 +3033,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
)
# Rename encoder parameters, delete unused ones
# to be compatible with khiops.core
kwargs["max_pairs"] = kwargs.pop("n_pairs")
kwargs["keep_initial_categorical_variables"] = kwargs["keep_initial_variables"]
kwargs["keep_initial_numerical_variables"] = kwargs.pop(
"keep_initial_variables"
Expand Down
27 changes: 0 additions & 27 deletions tests/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,12 +1112,9 @@ def setUpClass(cls):
"field_separator": "\t",
"detect_format": False,
"header_line": True,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"additional_data_tables": {},
}
Expand Down Expand Up @@ -1204,12 +1201,9 @@ def setUpClass(cls):
"field_separator": "\t",
"detect_format": False,
"header_line": True,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"additional_data_tables": {},
}
Expand Down Expand Up @@ -1304,12 +1298,9 @@ def setUpClass(cls):
"detect_format": False,
"header_line": True,
"max_constructed_variables": 10,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"additional_data_tables": {
"SpliceJunction`SpliceJunctionDNA"
Expand Down Expand Up @@ -1414,12 +1405,9 @@ def setUpClass(cls):
"detect_format": False,
"header_line": True,
"max_constructed_variables": 10,
"max_pairs": 1,
"max_trees": 0,
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"log_file_path": os.path.join(
cls.output_dir, "khiops.log"
Expand Down Expand Up @@ -2405,11 +2393,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self):
schema_type="monotable",
source_type="dataframe",
extra_estimator_kwargs={
"n_pairs": 1,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2424,11 +2409,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y(
schema_type="monotable",
source_type="dataframe_xy",
extra_estimator_kwargs={
"n_pairs": 1,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2441,11 +2423,8 @@ def test_parameter_transfer_regressor_fit_from_monotable_file_dataset(self):
schema_type="monotable",
source_type="file_dataset",
extra_estimator_kwargs={
"n_pairs": 1,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2459,12 +2438,9 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self):
source_type="dataframe",
extra_estimator_kwargs={
"n_features": 10,
"n_pairs": 1,
"n_trees": 0,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand All @@ -2478,12 +2454,9 @@ def test_parameter_transfer_regressor_fit_from_multitable_file_dataset(self):
source_type="file_dataset",
extra_estimator_kwargs={
"n_features": 10,
"n_pairs": 1,
"n_trees": 0,
"n_selected_features": 1,
"n_evaluated_features": 3,
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
},
)
Expand Down

0 comments on commit faf9b44

Please sign in to comment.