From 1aa0b9e269557c4fc9306231048cb22e0c4de6d7 Mon Sep 17 00:00:00 2001
From: fonhorst <fonhorst@alipoov.nb@gmail.com>
Date: Wed, 2 Aug 2023 19:29:24 +0300
Subject: [PATCH] fixing all pre-commit checks

---
 sparklightautoml/ml_algo/boost_lgbm.py | 11 +++++++----
 tests/spark/unit/dataset_utils.py      | 15 ++++-----------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/sparklightautoml/ml_algo/boost_lgbm.py b/sparklightautoml/ml_algo/boost_lgbm.py
index 84da2aa..4bda4f6 100644
--- a/sparklightautoml/ml_algo/boost_lgbm.py
+++ b/sparklightautoml/ml_algo/boost_lgbm.py
@@ -473,10 +473,13 @@ def fit_predict_single_fold(
 
         rows_count = full_data.count()
         if (run_params["executionMode"] == "streaming") and (rows_count <= 25_000):
-            warnings.warn(f"The fitting of lightgbm in streaming execution mode "
-                          f"may fail with SEGSIGV / SIGBUS error (probably due to a bug in synapse ml) "
-                          f"if too few data available per core. Train data rows count: {rows_count} "
-                          f"Consider switching to bulk execution mode if such crashes happen", RuntimeWarning)
+            warnings.warn(
+                f"The fitting of lightgbm in streaming execution mode "
+                f"may fail with SEGSIGV / SIGBUS error (probably due to a bug in synapse ml) "
+                f"if too few data available per core. Train data rows count: {rows_count} "
+                f"Consider switching to bulk execution mode if such crashes happen",
+                RuntimeWarning,
+            )
 
         # fitting the model
         ml_model = lgbm.fit(self._assembler.transform(full_data))
diff --git a/tests/spark/unit/dataset_utils.py b/tests/spark/unit/dataset_utils.py
index c6e85a6..d966509 100644
--- a/tests/spark/unit/dataset_utils.py
+++ b/tests/spark/unit/dataset_utils.py
@@ -1,6 +1,7 @@
 import os
 import pickle
 import shutil
+
 from typing import Any
 from typing import Dict
 from typing import List
@@ -8,12 +9,14 @@
 from typing import Tuple
 
 import pyspark.sql.functions as sf
+
 from pyspark.sql import SparkSession
 
 from sparklightautoml.dataset.base import PersistenceManager
 from sparklightautoml.dataset.base import SparkDataset
 from sparklightautoml.tasks.base import SparkTask
 
+
 DUMP_METADATA_NAME = "metadata.pickle"
 DUMP_DATA_NAME = "data.parquet"
 
@@ -117,7 +120,6 @@ def load_dump_if_exist(
             "franchise_dealer": "str",
         },
     },
-
     "used_cars_dataset_no_cols_limit": {
         "path": "examples/data/small_used_cars_data.csv",
         "task_type": "reg",
@@ -140,7 +142,6 @@ def load_dump_if_exist(
             "franchise_dealer": "str",
         },
     },
-
     "lama_test_dataset": {
         "path": "examples/data/sampled_app_train.csv",
         "task_type": "binary",
@@ -148,7 +149,6 @@ def load_dump_if_exist(
         "target_col": "TARGET",
         "roles": {"target": "TARGET", "drop": ["SK_ID_CURR"]},
     },
-
     # https://www.openml.org/d/734
     "ailerons_dataset": {
         "path": "examples/data/ailerons.csv",
@@ -157,7 +157,6 @@ def load_dump_if_exist(
         "target_col": "binaryClass",
         "roles": {"target": "binaryClass"},
     },
-
     # https://www.openml.org/d/4534
     "phishing_websites_dataset": {
         "path": "examples/data/PhishingWebsites.csv",
@@ -166,7 +165,6 @@ def load_dump_if_exist(
         "target_col": "Result",
         "roles": {"target": "Result"},
     },
-
     # https://www.openml.org/d/981
     "kdd_internet_usage": {
         "path": "examples/data/kdd_internet_usage.csv",
@@ -175,7 +173,6 @@ def load_dump_if_exist(
         "target_col": "Who_Pays_for_Access_Work",
         "roles": {"target": "Who_Pays_for_Access_Work"},
     },
-
     # https://www.openml.org/d/42821
     "nasa_dataset": {
         "path": "examples/data/nasa_phm2008.csv",
@@ -184,7 +181,6 @@ def load_dump_if_exist(
         "target_col": "class",
         "roles": {"target": "class"},
     },
-
     # https://www.openml.org/d/4549
     "buzz_dataset": {
         "path": "examples/data/Buzzinsocialmedia_Twitter_25k.csv",
@@ -193,7 +189,6 @@ def load_dump_if_exist(
         "target_col": "Annotation",
         "roles": {"target": "Annotation"},
     },
-
     # https://www.openml.org/d/372
     "internet_usage": {
         "path": "examples/data/internet_usage.csv",
@@ -202,7 +197,6 @@ def load_dump_if_exist(
         "target_col": "Actual_Time",
         "roles": {"target": "Actual_Time"},
     },
-
     # https://www.openml.org/d/4538
     "gesture_segmentation": {
         "path": "examples/data/gesture_segmentation.csv",
@@ -211,7 +205,6 @@ def load_dump_if_exist(
         "target_col": "Phase",
         "roles": {"target": "Phase"},
     },
-
     # https://www.openml.org/d/382
     "ipums_97": {
         "path": "examples/data/ipums_97.csv",
@@ -219,7 +212,7 @@ def load_dump_if_exist(
         "metric_name": "crossentropy",
         "target_col": "movedin",
         "roles": {"target": "movedin"},
-    }
+    },
 }