Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Boosting method implementation (LightGBM) #1264

Merged
merged 10 commits into from
Jul 30, 2024
7 changes: 5 additions & 2 deletions fedot/core/operations/evaluation/boostings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy
from fedot.core.operations.evaluation.operation_implementations.models.boostings_implementations import \
FedotCatBoostClassificationImplementation, FedotCatBoostRegressionImplementation, \
FedotXGBoostClassificationImplementation, FedotXGBoostRegressionImplementation
FedotXGBoostClassificationImplementation, FedotXGBoostRegressionImplementation, \
FedotLightGBMClassificationImplementation, FedotLightGBMRegressionImplementation
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.core.repository.tasks import TaskTypesEnum
from fedot.utilities.random import ImplementationRandomStateHandler
Expand All @@ -15,7 +16,9 @@ class BoostingStrategy(EvaluationStrategy):
'catboost': FedotCatBoostClassificationImplementation,
'catboostreg': FedotCatBoostRegressionImplementation,
'xgboost': FedotXGBoostClassificationImplementation,
'xgboostreg': FedotXGBoostRegressionImplementation
'xgboostreg': FedotXGBoostRegressionImplementation,
'lgbm': FedotLightGBMClassificationImplementation,
'lgbmreg': FedotLightGBMRegressionImplementation
}

def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, CatBoostRegressor, Pool
from lightgbm import LGBMClassifier, LGBMRegressor
from lightgbm import early_stopping as lgbm_early_stopping
from matplotlib import pyplot as plt
from xgboost import XGBClassifier, XGBRegressor

Expand All @@ -20,14 +22,17 @@ class FedotXGBoostImplementation(ModelImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)

self.check_and_update_params()

self.model_params = {k: v for k, v in self.params.to_dict().items() if k not in self.__operation_params}
self.model = None
self.features_names = None

def fit(self, input_data: InputData):
self.features_names = input_data.features_names

if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()
self.features_names = input_data.features_names

if self.params.get('use_eval_set'):
train_input, eval_input = train_test_data_setup(input_data)
Expand Down Expand Up @@ -123,7 +128,7 @@ def predict_proba(self, input_data: InputData):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x, _ = input_data.drop(columns=['target']), input_data['target']
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict_proba(train_x)
return prediction

Expand All @@ -135,8 +140,139 @@ def __init__(self, params: Optional[OperationParameters] = None):
self.model = XGBRegressor(**self.model_params)


class FedotLightGBMImplementation(ModelImplementation):
__operation_params = ['n_jobs', 'use_eval_set', 'enable_categorical']

def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)

self.check_and_update_params()

self.model_params = {k: v for k, v in self.params.to_dict().items() if k not in self.__operation_params}
self.model = None
self.features_names = None

def fit(self, input_data: InputData):
self.features_names = input_data.features_names

if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

if self.params.get('use_eval_set'):
train_input, eval_input = train_test_data_setup(input_data)

train_input = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
eval_input = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))

train_x, train_y = train_input.drop(columns=['target']), train_input['target']
eval_x, eval_y = eval_input.drop(columns=['target']), eval_input['target']

eval_metric = self.set_eval_metric(self.classes_)
callbacks = self.update_callbacks()

self.model.fit(
X=train_x, y=train_y,
eval_set=[(eval_x, eval_y)], eval_metric=eval_metric,
callbacks=callbacks
)

else:
train_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
train_x, train_y = train_data.drop(columns=['target']), train_data['target']

self.model.fit(
X=train_x, y=train_y,
)

return self.model

def predict(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict(train_x)

return prediction

def check_and_update_params(self):
early_stopping_rounds = self.params.get('early_stopping_rounds')
use_eval_set = self.params.get('use_eval_set')

if isinstance(early_stopping_rounds, int) and not use_eval_set:
self.params.update(early_stopping_rounds=False)

def update_callbacks(self) -> list:
callback = []

esr = self.params.get('early_stopping_rounds')
if isinstance(esr, int):
lgbm_early_stopping(esr, verbose=self.params.get('verbose'))

return callback

@staticmethod
def set_eval_metric(n_classes):
andreygetmanov marked this conversation as resolved.
Show resolved Hide resolved
if n_classes is None: # if n_classes is None -> regression
eval_metric = ''

elif len(n_classes) < 3: # if n_classes < 3 -> bin class
eval_metric = 'binary_logloss'

else: # else multiclass
eval_metric = 'multi_logloss'

return eval_metric

@staticmethod
def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
dataframe = pd.DataFrame(data=data.features, columns=data.features_names)
dataframe['target'] = data.target

if identify_cats and data.categorical_idx is not None:
for col in dataframe.columns[data.categorical_idx]:
dataframe[col] = dataframe[col].astype('category')

if data.numerical_idx is not None:
for col in dataframe.columns[data.numerical_idx]:
dataframe[col] = dataframe[col].astype('float')

return dataframe

def plot_feature_importance(self):
plot_feature_importance(self.features_names, self.model.feature_importances_)


class FedotLightGBMClassificationImplementation(FedotLightGBMImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.classes_ = None
self.model = LGBMClassifier(**self.model_params)

def fit(self, input_data: InputData):
self.classes_ = np.unique(np.array(input_data.target))
return super().fit(input_data=input_data)

def predict_proba(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict_proba(train_x)
return prediction


class FedotLightGBMRegressionImplementation(FedotLightGBMImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.classes_ = None
self.model = LGBMRegressor(**self.model_params)


class FedotCatBoostImplementation(ModelImplementation):
__operation_params = ['use_eval_set', 'n_jobs']
__operation_params = ['n_jobs', 'use_eval_set', 'enable_categorical']

def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
Expand All @@ -145,28 +281,35 @@ def __init__(self, params: Optional[OperationParameters] = None):

self.model_params = {k: v for k, v in self.params.to_dict().items() if k not in self.__operation_params}
self.model = None
self.features_names = None

def fit(self, input_data: InputData):
input_data = input_data.get_not_encoded_data()
self.features_names = input_data.features_names

if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

if self.params.get('use_eval_set'):
# TODO: Using this method for tuning
train_input, eval_input = train_test_data_setup(input_data)

train_input = self.convert_to_pool(train_input)
eval_input = self.convert_to_pool(eval_input)
train_input = self.convert_to_pool(train_input, identify_cats=self.params.get('enable_categorical'))
eval_input = self.convert_to_pool(eval_input, identify_cats=self.params.get('enable_categorical'))

self.model.fit(X=train_input, eval_set=eval_input)

else:
train_input = self.convert_to_pool(input_data)
train_input = self.convert_to_pool(input_data, identify_cats=self.params.get('enable_categorical'))

self.model.fit(train_input)

return self.model

def predict(self, input_data: InputData):
prediction = self.model.predict(input_data.get_not_encoded_data().features)
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

prediction = self.model.predict(input_data.features)

return prediction

Expand All @@ -182,11 +325,11 @@ def check_and_update_params(self):
self.params.update(use_best_model=False, early_stopping_rounds=False)

@staticmethod
def convert_to_pool(data: Optional[InputData]):
def convert_to_pool(data: Optional[InputData], identify_cats: bool):
return Pool(
data=data.features,
label=data.target,
cat_features=data.categorical_idx,
cat_features=data.categorical_idx if identify_cats else None,
feature_names=data.features_names.tolist() if data.features_names is not None else None
)

Expand Down Expand Up @@ -217,7 +360,10 @@ def fit(self, input_data: InputData):
return super().fit(input_data=input_data)

def predict_proba(self, input_data: InputData):
prediction = self.model.predict_proba(input_data.get_not_encoded_data().features)
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

prediction = self.model.predict_proba(input_data.features)
return prediction


Expand Down
63 changes: 61 additions & 2 deletions fedot/core/pipelines/tuning/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,10 +620,30 @@ def get_parameters_dict(self):
'hyperopt-dist': hp.uniformint,
'sampling-scope': [2, 256],
'type': 'discrete'},
'min_data_in_leaf': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [5, 100],
'type': 'discrete'},
'bagging_fraction': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [0.01, 1.0],
'type': 'continuous'},
'extra_trees': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'},
'learning_rate': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [0.01, 0.2],
'type': 'continuous'},
'force_col_wise': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'},
'force_row_wise ': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'},
'colsample_bytree': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.4, 1],
Expand All @@ -639,17 +659,45 @@ def get_parameters_dict(self):
'reg_lambda': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [1e-8, 10],
'type': 'continuous'}
'type': 'continuous'},
'early_stopping_rounds': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [5, 50],
'type': 'discrete'},
},
'lgbmreg': {
'boosting_type': {
'hyperopt-dist': hp.choice,
'sampling-scope': [['gbdt', 'dart', 'goss']],
'type': 'categorical'},
'num_leaves': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [2, 256],
'type': 'discrete'},
'min_data_in_leaf': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [5, 100],
'type': 'discrete'},
'bagging_fraction': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [0.01, 1.0],
'type': 'continuous'},
'extra_trees': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'},
'learning_rate': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [0.01, 0.2],
'type': 'continuous'},
'force_col_wise': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'},
'force_row_wise ': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[True, False]],
'type': 'categorical'},
'colsample_bytree': {
'hyperopt-dist': hp.uniform,
'sampling-scope': [0.4, 1],
Expand All @@ -665,7 +713,18 @@ def get_parameters_dict(self):
'reg_lambda': {
'hyperopt-dist': hp.loguniform,
'sampling-scope': [1e-8, 10],
'type': 'continuous'}
'type': 'continuous'},
'objective': {
'hyperopt-dist': hp.choice,
'sampling-scope': [
['regression', 'regression_l1', 'huber', 'fair',
'poisson', 'quantile', 'mape', 'tweedie', 'gamma']
],
'type': 'categorical'},
'early_stopping_rounds': {
'hyperopt-dist': hp.uniformint,
'sampling-scope': [5, 50],
'type': 'discrete'},
},
'catboost': {
'iterations': {
Expand Down
Loading
Loading