сруср еуые

aimclub · Dec 4, 2023 · 17073c1 · 17073c1
1 parent 0aa8acf
commit 17073c1
Show file tree

Hide file tree

Showing 25 changed files with 127 additions and 65 deletions.
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
@@ -4,7 +4,7 @@
 import os
 from copy import copy, deepcopy
 from dataclasses import dataclass, field
-from typing import Any, Iterable, List, Optional, Tuple, Union
+from typing import Any, Iterable, List, Optional, Tuple, Union, overload
 
 import numpy as np
 import pandas as pd

diff --git a/test/data/expected_metric_values.json b/test/data/expected_metric_values.json
@@ -2,64 +2,64 @@
   "complexity": {
     "node_number": 0.4,
     "structural": 0.43333333333333335,
-    "computation_time_in_seconds": 0.001
+    "computation_time_in_seconds": 0.0
   },
   "binary": {
-    "roc_auc": -0.9967141292442498,
-    "precision": -0.961038961038961,
-    "f1": -0.961038961038961,
-    "neg_log_loss": 0.12480364983668737,
-    "roc_auc_pen": -0.9923950346841914,
-    "accuracy": -0.9625
+    "roc_auc": -0.9799498746867168,
+    "precision": -0.9473684210526315,
+    "f1": -0.9473684210526315,
+    "neg_log_loss": 0.19864695688257933,
+    "roc_auc_pen": -0.9757034252297411,
+    "accuracy": -0.95
   },
   "multiclass": {
-    "roc_auc": -0.9973393347059444,
-    "precision": -0.9667403140534997,
-    "f1": -0.9648282401280049,
-    "neg_log_loss": 0.12313713353516573,
-    "roc_auc_pen": -0.9930175309222187,
-    "accuracy": -0.9647887323943662
+    "roc_auc": -0.9832500832500832,
+    "precision": -0.9777777777777779,
+    "f1": -0.9719701552732407,
+    "neg_log_loss": 0.17094588819131074,
+    "roc_auc_pen": -0.9789893328893329,
+    "accuracy": -0.9722222222222222
   },
   "regression": {
-    "rmse": 52.66625041489969,
-    "mse": 2773.7339327649224,
-    "neg_mean_squared_log_error": 0.1816505113919213,
-    "mape": 0.4054093661550918,
-    "smape": 51.74454568467751,
-    "mae": 42.70067250366354,
-    "r2": 0.5213672450946683,
-    "rmse_pen": 52.77158291572949
+    "rmse": 52.5400204534369,
+    "mse": 2760.4537492475683,
+    "neg_mean_squared_log_error": 0.11968905129112402,
+    "mape": 0.32791432529967834,
+    "smape": 49.48675123994897,
+    "mae": 41.55089094096007,
+    "r2": 0.389822739375963,
+    "rmse_pen": 52.64510049434378
   },
   "multitarget": {
-    "rmse": 4.242740603698809,
-    "mse": 34.447243787575864,
-    "neg_mean_squared_log_error": 0.0016149396995723982,
-    "mape": 0.024811474274980972,
-    "smape": 2.460824128804083,
-    "mae": 2.792325756927689,
-    "r2": 0.8938302810224498,
-    "rmse_pen": 4.2512260849062065
+    "rmse": 15.753366859480218,
+    "mse": 377.5025166058113,
+    "neg_mean_squared_log_error": 0.030627538521796293,
+    "mape": 0.15337090733886807,
+    "smape": 14.144394353302935,
+    "mae": 13.50645038033778,
+    "r2": -2.9713973901034954,
+    "rmse_pen": 15.784873593199178
   },
   "ts": {
-    "mase": 0.15114880404154094,
-    "rmse": 1.5387331777721858,
-    "mse": 2.367699792376889,
-    "neg_mean_squared_log_error": 0.00038988885997187243,
-    "mape": 0.016605915265088925,
-    "smape": 1.6566004975287463,
-    "mae": 1.3285184355230175,
-    "r2": 0.9923997085574247,
-    "rmse_pen": 1.5418106441277302
+    "mase": 0.6843245198500604,
+    "rmse": 5.4736585925929555,
+    "mse": 29.960938388266698,
+    "neg_mean_squared_log_error": 0.003868023869030223,
+    "mape": 0.061056201902745834,
+    "smape": 5.900193167286466,
+    "mae": 4.86630769671154,
+    "r2": 0.8851235060455247,
+    "rmse_pen": 5.484605909778141
   },
   "multits": {
-    "mase": 0.1509174893096649,
-    "rmse": 1.5354969862068721,
-    "mse": 2.357750994650387,
-    "neg_mean_squared_log_error": 0.00038897208639900315,
-    "mape": 0.01659010099781985,
-    "smape": 1.655093261482032,
-    "mae": 1.3264853007744228,
-    "r2": 0.9924316440935379,
-    "rmse_pen": 1.538567980179286
+    "mase": 0.6080543658437002,
+    "rmse": 6.977578875807477,
+    "mse": 48.68660696811473,
+    "neg_mean_squared_log_error": 0.005434854312738277,
+    "mape": 0.05898285612710732,
+    "smape": 5.603672314933528,
+    "mae": 4.960443510830185,
+    "r2": 0.8502227066753377,
+    "rmse_pen": 6.991534033559091
   }
 }
diff --git a/test/data/temp/binary_pred.pkl b/test/data/temp/binary_pred.pkl
diff --git a/test/data/temp/binary_pred_accuracy.pkl b/test/data/temp/binary_pred_accuracy.pkl
diff --git a/test/data/temp/binary_pred_f1.pkl b/test/data/temp/binary_pred_f1.pkl
diff --git a/test/data/temp/binary_pred_neg_log_loss.pkl b/test/data/temp/binary_pred_neg_log_loss.pkl
diff --git a/test/data/temp/binary_pred_precision.pkl b/test/data/temp/binary_pred_precision.pkl
diff --git a/test/data/temp/binary_pred_roc_auc.pkl b/test/data/temp/binary_pred_roc_auc.pkl
diff --git a/test/data/temp/binary_pred_roc_auc_pen.pkl b/test/data/temp/binary_pred_roc_auc_pen.pkl
diff --git a/test/data/temp/binary_test.pkl b/test/data/temp/binary_test.pkl
diff --git a/test/data/temp/binary_test_accuracy.pkl b/test/data/temp/binary_test_accuracy.pkl
diff --git a/test/data/temp/binary_test_f1.pkl b/test/data/temp/binary_test_f1.pkl
diff --git a/test/data/temp/binary_test_neg_log_loss.pkl b/test/data/temp/binary_test_neg_log_loss.pkl
diff --git a/test/data/temp/binary_test_precision.pkl b/test/data/temp/binary_test_precision.pkl
diff --git a/test/data/temp/binary_test_roc_auc.pkl b/test/data/temp/binary_test_roc_auc.pkl
diff --git a/test/data/temp/binary_test_roc_auc_pen.pkl b/test/data/temp/binary_test_roc_auc_pen.pkl
diff --git a/test/data/temp/binary_train.pkl b/test/data/temp/binary_train.pkl
diff --git a/test/data/temp/binary_train_accuracy.pkl b/test/data/temp/binary_train_accuracy.pkl
diff --git a/test/data/temp/binary_train_f1.pkl b/test/data/temp/binary_train_f1.pkl
diff --git a/test/data/temp/binary_train_neg_log_loss.pkl b/test/data/temp/binary_train_neg_log_loss.pkl
diff --git a/test/data/temp/binary_train_precision.pkl b/test/data/temp/binary_train_precision.pkl
diff --git a/test/data/temp/binary_train_roc_auc.pkl b/test/data/temp/binary_train_roc_auc.pkl
diff --git a/test/data/temp/binary_train_roc_auc_pen.pkl b/test/data/temp/binary_train_roc_auc_pen.pkl
diff --git a/test/unit/composer/test_metrics.py b/test/unit/composer/test_metrics.py
@@ -1,4 +1,5 @@
 import json
+import pickle
 from itertools import product
 from typing import Callable, Dict, Tuple, Union
 
@@ -23,6 +24,7 @@
 @pytest.fixture(scope='session')
 def data_setup(request):
     task_type = request.param
+    validation_blocks = None
     if task_type in ('binary', 'complexity'):
         x, y = load_breast_cancer(return_X_y=True)
         task = Task(TaskTypesEnum.classification)
@@ -43,15 +45,17 @@ def data_setup(request):
         file_path = fedot_project_root() / 'test/data/short_time_series.csv'
         df = pd.read_csv(file_path)
         x = y = df['sea_height'].to_numpy()
-        task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=10))
+        task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=5))
         data_type = DataTypesEnum.ts
+        validation_blocks = 2
     elif task_type == 'multits':
         file_path = fedot_project_root() / 'test/data/short_time_series.csv'
         df = pd.read_csv(file_path)
         x = df[['sea_height', 'sea_height']].to_numpy()
         y = df['sea_height'].to_numpy()
         task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=10))
         data_type = DataTypesEnum.multi_ts
+        validation_blocks = 2
     else:
         raise ValueError(f'Unsupported task type: {task_type}')
 
@@ -64,8 +68,8 @@ def data_setup(request):
                            task=task,
                            data_type=data_type)
     # Train test split
-    train_data, test_data = train_test_data_setup(input_data)
-    return train_data, test_data, task_type
+    train_data, test_data = train_test_data_setup(input_data, validation_blocks=validation_blocks)
+    return train_data, test_data, task_type, validation_blocks
 
 
 def get_classification_pipeline():
@@ -105,34 +109,92 @@ def expected_values() -> Dict[str, Dict[str, float]]:
 
 
 @pytest.mark.parametrize(
-    'metric, pipeline_func, data_setup, validation_blocks',
+    'metric, pipeline_func, data_setup',
     [
-        *product(ComplexityMetricsEnum, [get_classification_pipeline], ['complexity'], [None]),
-        *product(ClassificationMetricsEnum, [get_classification_pipeline], ['binary', 'multiclass'], [None]),
-        *product(RegressionMetricsEnum, [get_regression_pipeline], ['regression', 'multitarget'], [None]),
-        *product(TimeSeriesForecastingMetricsEnum, [get_ts_pipeline], ['ts', 'multits'], [2])
+        *product(ComplexityMetricsEnum, [get_classification_pipeline], ['complexity']),
+        *product(ClassificationMetricsEnum, [get_classification_pipeline], ['multiclass']),
+        *product(RegressionMetricsEnum, [get_regression_pipeline], ['regression', 'multitarget']),
+        *product(TimeSeriesForecastingMetricsEnum, [get_ts_pipeline], ['ts', 'multits'])
     ],
     indirect=['data_setup']
 )
 def test_metrics(metric: ClassificationMetricsEnum, pipeline_func: Callable[[], Pipeline],
-                 validation_blocks: Union[int, None], data_setup: Tuple[InputData, InputData, str],
-                 expected_values: Dict[str, Dict[str, float]], update_expected_values: bool = False):
-    train, _, task_type = data_setup
+                 data_setup: Tuple[InputData, InputData, str, Union[int, None]],
+                 expected_values: Dict[str, Dict[str, float]], update_expected_values: bool = True):
+    train, test, task_type, validation_blocks = data_setup
+
+    pipeline = pipeline_func()
+    pipeline.fit(input_data=train)
+    metric_function = MetricsRepository.get_metric(metric)
+    metric_class = MetricsRepository.get_metric_class(metric)
+    metric_value = metric_function(pipeline=pipeline, reference_data=test, validation_blocks=validation_blocks)
+
+    if update_expected_values:
+        with open(fedot_project_root() / 'test/data/expected_metric_values.json', 'w') as f:
+            expected_values[task_type] = expected_values.get(task_type) or {}
+            expected_values[task_type][str(metric)] = metric_value
+            json.dump(expected_values, f, indent=2)
+
+    expected_value = expected_values[task_type][str(metric)]
+
+    assert np.isclose(metric_value, expected_value, rtol=0.001, atol=0.001)
+    assert not np.isclose(metric_value, metric_class.default_value, rtol=0.01, atol=0.01)
+
+
+@pytest.mark.parametrize(
+    'metric, pipeline_func, data_setup',
+    [
+        *product(ClassificationMetricsEnum, [get_classification_pipeline], ['binary']),
+    ],
+    indirect=['data_setup']
+)
+def test_binary_classification(metric: ClassificationMetricsEnum, pipeline_func: Callable[[], Pipeline],
+                               data_setup: Tuple[InputData, InputData, str, Union[int, None]],
+                               expected_values: Dict[str, Dict[str, float]], update_expected_values: bool = False):
+    train, test, task_type, validation_blocks = data_setup
 
     pipeline = pipeline_func()
     pipeline.fit(input_data=train)
     metric_function = MetricsRepository.get_metric(metric)
     metric_class = MetricsRepository.get_metric_class(metric)
-    metric_value = metric_function(pipeline=pipeline, reference_data=train, validation_blocks=validation_blocks)
+    metric_value = metric_function(pipeline=pipeline, reference_data=test, validation_blocks=validation_blocks)
 
     if update_expected_values:
         with open(fedot_project_root() / 'test/data/expected_metric_values.json', 'w') as f:
-            expected_values = dict(binary={}, multiclass={}, regression={}, multitarget={}, ts={}, multits={})
+            expected_values[task_type] = expected_values.get(task_type) or {}
             expected_values[task_type][str(metric)] = metric_value
-            json.dump(expected_values, f)
+            json.dump(expected_values, f, indent=2)
+
+    pred = pipeline.predict(test)
+    expected_data_path = fedot_project_root() / 'test' / 'data' / 'temp'
+    expected_train_path = expected_data_path / f'{task_type}_train_{metric}.pkl'
+    expected_test_path = expected_data_path / f'{task_type}_test_{metric}.pkl'
+    expected_pred_path = expected_data_path / f'{task_type}_pred_{metric}.pkl'
+    if update_expected_values:
+        with open(expected_train_path, 'wb') as f:
+            pickle.dump(train, f)
+        with open(expected_test_path, 'wb') as f:
+            pickle.dump(test, f)
+        with open(expected_pred_path, 'wb') as f:
+            pickle.dump(pred, f)
+        expected_train = train
+        expected_test = test
+        expected_pred = pred
+    else:
+        with open(expected_train_path, 'rb') as f:
+            expected_train = pickle.load(f)
+        with open(expected_test_path, 'rb') as f:
+            expected_test = pickle.load(f)
+        with open(expected_pred_path, 'rb') as f:
+            expected_pred = pickle.load(f)
 
     expected_value = expected_values[task_type][str(metric)]
 
+    assert (train.features == expected_train.features).all()
+    assert (np.ravel(train.target) == np.ravel(expected_train.target)).all()
+    assert (test.features == expected_test.features).all()
+    assert (np.ravel(test.target) == np.ravel(expected_test.target)).all()
+    assert (np.ravel(pred.predict) == np.ravel(expected_pred.predict)).all()
     assert np.isclose(metric_value, expected_value, rtol=0.001, atol=0.001)
     assert not np.isclose(metric_value, metric_class.default_value, rtol=0.01, atol=0.01)
 
@@ -149,7 +211,7 @@ def test_metrics(metric: ClassificationMetricsEnum, pipeline_func: Callable[[],
 def test_ideal_case_metrics(metric: ClassificationMetricsEnum, pipeline_func: Callable[[], Pipeline],
                             validation_blocks: Union[int, None], data_setup: Tuple[InputData, InputData, str],
                             expected_values):
-    reference, _, task_type = data_setup
+    reference, _, task_type, _ = data_setup
     metric_class = MetricsRepository.get_metric_class(metric)
     predicted = OutputData(idx=reference.idx, task=reference.task, data_type=reference.data_type)
     if task_type == 'multiclass' and metric_class.output_mode != 'labels':
@@ -167,7 +229,7 @@ def test_ideal_case_metrics(metric: ClassificationMetricsEnum, pipeline_func: Ca
 
 @pytest.mark.parametrize('data_setup', ['multitarget'], indirect=True)
 def test_predict_shape_multi_target(data_setup: Tuple[InputData, InputData, str]):
-    train, test, _ = data_setup
+    train, test, _, _ = data_setup
     simple_pipeline = Pipeline(PipelineNode('linear'))
     simple_pipeline.fit(input_data=train)
 

diff --git a/test/unit/tasks/test_regression.py b/test/unit/tasks/test_regression.py
@@ -103,7 +103,7 @@ def test_regression_pipeline_with_data_operation_fit_predict_correct():
 @pytest.mark.parametrize('data_setup', ['multitarget'], indirect=True)
 def test_multi_target_regression_composing_correct(data_setup):
     # Load simple dataset for multi-target
-    train, test, _ = data_setup
+    train, test, _, _ = data_setup
 
     problem = 'regression'
     timeout = 0.1