aimclub · aPovidlo · Jul 30, 2024 · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024
diff --git a/fedot/core/operations/evaluation/boostings.py b/fedot/core/operations/evaluation/boostings.py
@@ -4,7 +4,8 @@
 from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy
 from fedot.core.operations.evaluation.operation_implementations.models.boostings_implementations import \
     FedotCatBoostClassificationImplementation, FedotCatBoostRegressionImplementation, \
-    FedotXGBoostClassificationImplementation, FedotXGBoostRegressionImplementation
+    FedotXGBoostClassificationImplementation, FedotXGBoostRegressionImplementation, \
+    FedotLightGBMClassificationImplementation, FedotLightGBMRegressionImplementation
 from fedot.core.operations.operation_parameters import OperationParameters
 from fedot.core.repository.tasks import TaskTypesEnum
 from fedot.utilities.random import ImplementationRandomStateHandler
@@ -15,7 +16,9 @@ class BoostingStrategy(EvaluationStrategy):
         'catboost': FedotCatBoostClassificationImplementation,
         'catboostreg': FedotCatBoostRegressionImplementation,
         'xgboost': FedotXGBoostClassificationImplementation,
-        'xgboostreg': FedotXGBoostRegressionImplementation
+        'xgboostreg': FedotXGBoostRegressionImplementation,
+        'lgbm': FedotLightGBMClassificationImplementation,
+        'lgbmreg': FedotLightGBMRegressionImplementation
     }
 
     def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):

diff --git a/.../core/operations/evaluation/operation_implementations/models/boostings_implementations.py b/.../core/operations/evaluation/operation_implementations/models/boostings_implementations.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pandas as pd
 from catboost import CatBoostClassifier, CatBoostRegressor, Pool
+from lightgbm import LGBMClassifier, LGBMRegressor
+from lightgbm import early_stopping as lgbm_early_stopping
 from matplotlib import pyplot as plt
 from xgboost import XGBClassifier, XGBRegressor
 
@@ -20,14 +22,17 @@ class FedotXGBoostImplementation(ModelImplementation):
     def __init__(self, params: Optional[OperationParameters] = None):
         super().__init__(params)
 
+        self.check_and_update_params()
+
         self.model_params = {k: v for k, v in self.params.to_dict().items() if k not in self.__operation_params}
         self.model = None
         self.features_names = None
 
     def fit(self, input_data: InputData):
+        self.features_names = input_data.features_names
+
         if self.params.get('enable_categorical'):
             input_data = input_data.get_not_encoded_data()
-            self.features_names = input_data.features_names
 
         if self.params.get('use_eval_set'):
             train_input, eval_input = train_test_data_setup(input_data)
@@ -123,7 +128,7 @@ def predict_proba(self, input_data: InputData):
             input_data = input_data.get_not_encoded_data()
 
         input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
-        train_x, _ = input_data.drop(columns=['target']), input_data['target']
+        train_x = input_data.drop(columns=['target'])
         prediction = self.model.predict_proba(train_x)
         return prediction
 
@@ -135,8 +140,139 @@ def __init__(self, params: Optional[OperationParameters] = None):
         self.model = XGBRegressor(**self.model_params)
 
 
+class FedotLightGBMImplementation(ModelImplementation):
+    __operation_params = ['n_jobs', 'use_eval_set', 'enable_categorical']
+
+    def __init__(self, params: Optional[OperationParameters] = None):
+        super().__init__(params)
+
+        self.check_and_update_params()
+
+        self.model_params = {k: v for k, v in self.params.to_dict().items() if k not in self.__operation_params}
+        self.model = None
+        self.features_names = None
+
+    def fit(self, input_data: InputData):
+        self.features_names = input_data.features_names
+
+        if self.params.get('enable_categorical'):
+            input_data = input_data.get_not_encoded_data()
+
+        if self.params.get('use_eval_set'):
+            train_input, eval_input = train_test_data_setup(input_data)
+
+            train_input = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
+            eval_input = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))
+
+            train_x, train_y = train_input.drop(columns=['target']), train_input['target']
+            eval_x, eval_y = eval_input.drop(columns=['target']), eval_input['target']
+
+            eval_metric = self.set_eval_metric(self.classes_)
+            callbacks = self.update_callbacks()
+
+            self.model.fit(
+                X=train_x, y=train_y,
+                eval_set=[(eval_x, eval_y)], eval_metric=eval_metric,
+                callbacks=callbacks
+            )
+
+        else:
+            train_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
+            train_x, train_y = train_data.drop(columns=['target']), train_data['target']
+
+            self.model.fit(
+                X=train_x, y=train_y,
+            )
+
+        return self.model
+
+    def predict(self, input_data: InputData):
+        if self.params.get('enable_categorical'):
+            input_data = input_data.get_not_encoded_data()
+
+        input_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
+        train_x = input_data.drop(columns=['target'])
+        prediction = self.model.predict(train_x)
+
+        return prediction
+
+    def check_and_update_params(self):
+        early_stopping_rounds = self.params.get('early_stopping_rounds')
+        use_eval_set = self.params.get('use_eval_set')
+
+        if isinstance(early_stopping_rounds, int) and not use_eval_set:
+            self.params.update(early_stopping_rounds=False)
+
+    def update_callbacks(self) -> list:
+        callback = []
+
+        esr = self.params.get('early_stopping_rounds')
+        if isinstance(esr, int):
+            lgbm_early_stopping(esr, verbose=self.params.get('verbose'))
+
+        return callback
+
+    @staticmethod
+    def set_eval_metric(n_classes):
+        if n_classes is None:  # if n_classes is None -> regression
+            eval_metric = ''
+
+        elif len(n_classes) < 3:  # if n_classes < 3 -> bin class
+            eval_metric = 'binary_logloss'
+
+        else:  # else multiclass
+            eval_metric = 'multi_logloss'
+
+        return eval_metric
+
+    @staticmethod
+    def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
+        dataframe = pd.DataFrame(data=data.features, columns=data.features_names)
+        dataframe['target'] = data.target
+
+        if identify_cats and data.categorical_idx is not None:
+            for col in dataframe.columns[data.categorical_idx]:
+                dataframe[col] = dataframe[col].astype('category')
+
+        if data.numerical_idx is not None:
+            for col in dataframe.columns[data.numerical_idx]:
+                dataframe[col] = dataframe[col].astype('float')
+
+        return dataframe
+
+    def plot_feature_importance(self):
+        plot_feature_importance(self.features_names, self.model.feature_importances_)
+
+
+class FedotLightGBMClassificationImplementation(FedotLightGBMImplementation):
+    def __init__(self, params: Optional[OperationParameters] = None):
+        super().__init__(params)
+        self.classes_ = None
+        self.model = LGBMClassifier(**self.model_params)
+
+    def fit(self, input_data: InputData):
+        self.classes_ = np.unique(np.array(input_data.target))
+        return super().fit(input_data=input_data)
+
+    def predict_proba(self, input_data: InputData):
+        if self.params.get('enable_categorical'):
+            input_data = input_data.get_not_encoded_data()
+
+        input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
+        train_x = input_data.drop(columns=['target'])
+        prediction = self.model.predict_proba(train_x)
+        return prediction
+
+
+class FedotLightGBMRegressionImplementation(FedotLightGBMImplementation):
+    def __init__(self, params: Optional[OperationParameters] = None):
+        super().__init__(params)
+        self.classes_ = None
+        self.model = LGBMRegressor(**self.model_params)
+
+
 class FedotCatBoostImplementation(ModelImplementation):
-    __operation_params = ['use_eval_set', 'n_jobs']
+    __operation_params = ['n_jobs', 'use_eval_set', 'enable_categorical']
 
     def __init__(self, params: Optional[OperationParameters] = None):
         super().__init__(params)
@@ -145,28 +281,35 @@ def __init__(self, params: Optional[OperationParameters] = None):
 
         self.model_params = {k: v for k, v in self.params.to_dict().items() if k not in self.__operation_params}
         self.model = None
+        self.features_names = None
 
     def fit(self, input_data: InputData):
-        input_data = input_data.get_not_encoded_data()
+        self.features_names = input_data.features_names
+
+        if self.params.get('enable_categorical'):
+            input_data = input_data.get_not_encoded_data()
 
         if self.params.get('use_eval_set'):
             # TODO: Using this method for tuning
             train_input, eval_input = train_test_data_setup(input_data)
 
-            train_input = self.convert_to_pool(train_input)
-            eval_input = self.convert_to_pool(eval_input)
+            train_input = self.convert_to_pool(train_input, identify_cats=self.params.get('enable_categorical'))
+            eval_input = self.convert_to_pool(eval_input, identify_cats=self.params.get('enable_categorical'))
 
             self.model.fit(X=train_input, eval_set=eval_input)
 
         else:
-            train_input = self.convert_to_pool(input_data)
+            train_input = self.convert_to_pool(input_data, identify_cats=self.params.get('enable_categorical'))
 
             self.model.fit(train_input)
 
         return self.model
 
     def predict(self, input_data: InputData):
-        prediction = self.model.predict(input_data.get_not_encoded_data().features)
+        if self.params.get('enable_categorical'):
+            input_data = input_data.get_not_encoded_data()
+
+        prediction = self.model.predict(input_data.features)
 
         return prediction
 
@@ -182,11 +325,11 @@ def check_and_update_params(self):
             self.params.update(use_best_model=False, early_stopping_rounds=False)
 
     @staticmethod
-    def convert_to_pool(data: Optional[InputData]):
+    def convert_to_pool(data: Optional[InputData], identify_cats: bool):
         return Pool(
             data=data.features,
             label=data.target,
-            cat_features=data.categorical_idx,
+            cat_features=data.categorical_idx if identify_cats else None,
             feature_names=data.features_names.tolist() if data.features_names is not None else None
         )
 
@@ -217,7 +360,10 @@ def fit(self, input_data: InputData):
         return super().fit(input_data=input_data)
 
     def predict_proba(self, input_data: InputData):
-        prediction = self.model.predict_proba(input_data.get_not_encoded_data().features)
+        if self.params.get('enable_categorical'):
+            input_data = input_data.get_not_encoded_data()
+
+        prediction = self.model.predict_proba(input_data.features)
         return prediction
 
 

diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py
@@ -620,10 +620,30 @@ def get_parameters_dict(self):
                     'hyperopt-dist': hp.uniformint,
                     'sampling-scope': [2, 256],
                     'type': 'discrete'},
+                'min_data_in_leaf': {
+                    'hyperopt-dist': hp.uniformint,
+                    'sampling-scope': [5, 100],
+                    'type': 'discrete'},
+                'bagging_fraction': {
+                    'hyperopt-dist': hp.loguniform,
+                    'sampling-scope': [0.01, 1.0],
+                    'type': 'continuous'},
+                'extra_trees': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [[True, False]],
+                    'type': 'categorical'},
                 'learning_rate': {
                     'hyperopt-dist': hp.loguniform,
                     'sampling-scope': [0.01, 0.2],
                     'type': 'continuous'},
+                'force_col_wise': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [[True, False]],
+                    'type': 'categorical'},
+                'force_row_wise ': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [[True, False]],
+                    'type': 'categorical'},
                 'colsample_bytree': {
                     'hyperopt-dist': hp.uniform,
                     'sampling-scope': [0.4, 1],
@@ -639,17 +659,45 @@ def get_parameters_dict(self):
                 'reg_lambda': {
                     'hyperopt-dist': hp.loguniform,
                     'sampling-scope': [1e-8, 10],
-                    'type': 'continuous'}
+                    'type': 'continuous'},
+                'early_stopping_rounds': {
+                    'hyperopt-dist': hp.uniformint,
+                    'sampling-scope': [5, 50],
+                    'type': 'discrete'},
             },
             'lgbmreg': {
+                'boosting_type': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [['gbdt', 'dart', 'goss']],
+                    'type': 'categorical'},
                 'num_leaves': {
                     'hyperopt-dist': hp.uniformint,
                     'sampling-scope': [2, 256],
                     'type': 'discrete'},
+                'min_data_in_leaf': {
+                    'hyperopt-dist': hp.uniformint,
+                    'sampling-scope': [5, 100],
+                    'type': 'discrete'},
+                'bagging_fraction': {
+                    'hyperopt-dist': hp.loguniform,
+                    'sampling-scope': [0.01, 1.0],
+                    'type': 'continuous'},
+                'extra_trees': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [[True, False]],
+                    'type': 'categorical'},
                 'learning_rate': {
                     'hyperopt-dist': hp.loguniform,
                     'sampling-scope': [0.01, 0.2],
                     'type': 'continuous'},
+                'force_col_wise': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [[True, False]],
+                    'type': 'categorical'},
+                'force_row_wise ': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [[True, False]],
+                    'type': 'categorical'},
                 'colsample_bytree': {
                     'hyperopt-dist': hp.uniform,
                     'sampling-scope': [0.4, 1],
@@ -665,7 +713,18 @@ def get_parameters_dict(self):
                 'reg_lambda': {
                     'hyperopt-dist': hp.loguniform,
                     'sampling-scope': [1e-8, 10],
-                    'type': 'continuous'}
+                    'type': 'continuous'},
+                'objective': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [
+                        ['regression', 'regression_l1', 'huber', 'fair',
+                         'poisson', 'quantile', 'mape', 'tweedie', 'gamma']
+                    ],
+                    'type': 'categorical'},
+                'early_stopping_rounds': {
+                    'hyperopt-dist': hp.uniformint,
+                    'sampling-scope': [5, 50],
+                    'type': 'discrete'},
             },
             'catboost': {
                 'iterations': {