From 1aa0b9e269557c4fc9306231048cb22e0c4de6d7 Mon Sep 17 00:00:00 2001 From: fonhorst Date: Wed, 2 Aug 2023 19:29:24 +0300 Subject: [PATCH] fixing all pre-commit checks --- sparklightautoml/ml_algo/boost_lgbm.py | 11 +++++++---- tests/spark/unit/dataset_utils.py | 15 ++++----------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/sparklightautoml/ml_algo/boost_lgbm.py b/sparklightautoml/ml_algo/boost_lgbm.py index 84da2aa..4bda4f6 100644 --- a/sparklightautoml/ml_algo/boost_lgbm.py +++ b/sparklightautoml/ml_algo/boost_lgbm.py @@ -473,10 +473,13 @@ def fit_predict_single_fold( rows_count = full_data.count() if (run_params["executionMode"] == "streaming") and (rows_count <= 25_000): - warnings.warn(f"The fitting of lightgbm in streaming execution mode " - f"may fail with SEGSIGV / SIGBUS error (probably due to a bug in synapse ml) " - f"if too few data available per core. Train data rows count: {rows_count} " - f"Consider switching to bulk execution mode if such crashes happen", RuntimeWarning) + warnings.warn( + f"The fitting of lightgbm in streaming execution mode " + f"may fail with SEGSIGV / SIGBUS error (probably due to a bug in synapse ml) " + f"if too few data available per core. Train data rows count: {rows_count} " + f"Consider switching to bulk execution mode if such crashes happen", + RuntimeWarning, + ) # fitting the model ml_model = lgbm.fit(self._assembler.transform(full_data)) diff --git a/tests/spark/unit/dataset_utils.py b/tests/spark/unit/dataset_utils.py index c6e85a6..d966509 100644 --- a/tests/spark/unit/dataset_utils.py +++ b/tests/spark/unit/dataset_utils.py @@ -1,6 +1,7 @@ import os import pickle import shutil + from typing import Any from typing import Dict from typing import List @@ -8,12 +9,14 @@ from typing import Tuple import pyspark.sql.functions as sf + from pyspark.sql import SparkSession from sparklightautoml.dataset.base import PersistenceManager from sparklightautoml.dataset.base import SparkDataset from sparklightautoml.tasks.base import SparkTask + DUMP_METADATA_NAME = "metadata.pickle" DUMP_DATA_NAME = "data.parquet" @@ -117,7 +120,6 @@ def load_dump_if_exist( "franchise_dealer": "str", }, }, - "used_cars_dataset_no_cols_limit": { "path": "examples/data/small_used_cars_data.csv", "task_type": "reg", @@ -140,7 +142,6 @@ def load_dump_if_exist( "franchise_dealer": "str", }, }, - "lama_test_dataset": { "path": "examples/data/sampled_app_train.csv", "task_type": "binary", @@ -148,7 +149,6 @@ def load_dump_if_exist( "target_col": "TARGET", "roles": {"target": "TARGET", "drop": ["SK_ID_CURR"]}, }, - # https://www.openml.org/d/734 "ailerons_dataset": { "path": "examples/data/ailerons.csv", @@ -157,7 +157,6 @@ def load_dump_if_exist( "target_col": "binaryClass", "roles": {"target": "binaryClass"}, }, - # https://www.openml.org/d/4534 "phishing_websites_dataset": { "path": "examples/data/PhishingWebsites.csv", @@ -166,7 +165,6 @@ def load_dump_if_exist( "target_col": "Result", "roles": {"target": "Result"}, }, - # https://www.openml.org/d/981 "kdd_internet_usage": { "path": "examples/data/kdd_internet_usage.csv", @@ -175,7 +173,6 @@ def load_dump_if_exist( "target_col": "Who_Pays_for_Access_Work", "roles": {"target": "Who_Pays_for_Access_Work"}, }, - # https://www.openml.org/d/42821 "nasa_dataset": { "path": "examples/data/nasa_phm2008.csv", @@ -184,7 +181,6 @@ def load_dump_if_exist( "target_col": "class", "roles": {"target": "class"}, }, - # https://www.openml.org/d/4549 "buzz_dataset": { "path": "examples/data/Buzzinsocialmedia_Twitter_25k.csv", @@ -193,7 +189,6 @@ def load_dump_if_exist( "target_col": "Annotation", "roles": {"target": "Annotation"}, }, - # https://www.openml.org/d/372 "internet_usage": { "path": "examples/data/internet_usage.csv", @@ -202,7 +197,6 @@ def load_dump_if_exist( "target_col": "Actual_Time", "roles": {"target": "Actual_Time"}, }, - # https://www.openml.org/d/4538 "gesture_segmentation": { "path": "examples/data/gesture_segmentation.csv", @@ -211,7 +205,6 @@ def load_dump_if_exist( "target_col": "Phase", "roles": {"target": "Phase"}, }, - # https://www.openml.org/d/382 "ipums_97": { "path": "examples/data/ipums_97.csv", @@ -219,7 +212,7 @@ def load_dump_if_exist( "metric_name": "crossentropy", "target_col": "movedin", "roles": {"target": "movedin"}, - } + }, }